[PATCH v2 1/2] btrfs: Add WARN_ON for qgroup reserved underflow

2016-10-19 Thread Qu Wenruo
Goldwyn Rodrigues has exposed and fixed a bug which underflows btrfs
qgroup reserved space, and leads to non-writable fs.

This reminds us that we don't have enough underflow check for qgroup
reserved space.

For underflow case, we should not really underflow the numbers but warn
and keeps qgroup still work.

So add more check on qgroup reserved space and add WARN_ON() and
btrfs_warn() for any underflow case.

Signed-off-by: Qu Wenruo 
---
Changelog:
v2:
  Add btrfs_warn() output for fsid, qgroupid, original reserved space and
  num_bytes to reduce, for end-user to locate the subvolume causing the
  problem. Suggested by David.
---
 fs/btrfs/qgroup.c | 32 +++-
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 11f4fff..fc0c64e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1031,6 +1031,15 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
list_add(>dirty, _info->dirty_qgroups);
 }
 
+static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup,
+ u64 num_bytes)
+{
+   btrfs_warn(fs_info,
+   "qgroup %llu reserved space underflow, have: %llu, to free: 
%llu",
+   qgroup->qgroupid, qgroup->reserved, num_bytes);
+   qgroup->reserved = 0;
+}
 /*
  * The easy accounting, if we are adding/removing the only ref for an extent
  * then this qgroup and all of the parent qgroups get their reference and
@@ -1058,8 +1067,13 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
-   if (sign > 0)
-   qgroup->reserved -= num_bytes;
+   if (sign > 0) {
+   if (WARN_ON(qgroup->reserved < num_bytes))
+   report_reserved_underflow(fs_info, qgroup,
+ num_bytes);
+   else
+   qgroup->reserved -= num_bytes;
+   }
 
qgroup_dirty(fs_info, qgroup);
 
@@ -1079,8 +1093,13 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
qgroup->rfer_cmpr += sign * num_bytes;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
-   if (sign > 0)
-   qgroup->reserved -= num_bytes;
+   if (sign > 0) {
+   if (WARN_ON(qgroup->reserved < num_bytes))
+   report_reserved_underflow(fs_info, qgroup,
+ num_bytes);
+   else
+   qgroup->reserved -= num_bytes;
+   }
qgroup->excl_cmpr += sign * num_bytes;
qgroup_dirty(fs_info, qgroup);
 
@@ -2204,7 +2223,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info 
*fs_info,
 
qg = u64_to_ptr(unode->aux);
 
-   qg->reserved -= num_bytes;
+   if (WARN_ON(qgroup->reserved < num_bytes))
+   report_reserved_underflow(fs_info, qgroup, num_bytes);
+   else
+   qgroup->reserved -= num_bytes;
 
list_for_each_entry(glist, >groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
-- 
2.10.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] btrfs: Add trace point for qgroup reserved space

2016-10-19 Thread Qu Wenruo
Introduce the following trace points:
qgroup_update_reserve
qgroup_meta_reserve

And modify the timing of btrfs_qgroup_free_delayed_ref() and
btrfs_qgroup_release_data() events, to work with qgroup_update_reserve()
event.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/qgroup.c| 21 ++---
 fs/btrfs/qgroup.h|  2 +-
 include/trace/events/btrfs.h | 43 +++
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fc0c64e..aad34314 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1068,6 +1068,8 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
if (sign > 0) {
+   trace_qgroup_update_reserve(fs_info, qgroup->qgroupid,
+   qgroup->reserved, (s64)-num_bytes);
if (WARN_ON(qgroup->reserved < num_bytes))
report_reserved_underflow(fs_info, qgroup,
  num_bytes);
@@ -1094,6 +1096,9 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
if (sign > 0) {
+   trace_qgroup_update_reserve(fs_info, qgroup->qgroupid,
+   qgroup->reserved,
+   (s64)-num_bytes);
if (WARN_ON(qgroup->reserved < num_bytes))
report_reserved_underflow(fs_info, qgroup,
  num_bytes);
@@ -2178,6 +2183,8 @@ static int qgroup_reserve(struct btrfs_root *root, u64 
num_bytes)
 
qg = u64_to_ptr(unode->aux);
 
+   trace_qgroup_update_reserve(fs_info, qg->qgroupid,
+   qg->reserved, num_bytes);
qg->reserved += num_bytes;
}
 
@@ -2223,6 +2230,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info 
*fs_info,
 
qg = u64_to_ptr(unode->aux);
 
+   trace_qgroup_update_reserve(fs_info, qg->qgroupid,
+   qg->reserved, (s64)-num_bytes);
if (WARN_ON(qgroup->reserved < num_bytes))
report_reserved_underflow(fs_info, qgroup, num_bytes);
else
@@ -2651,12 +2660,12 @@ static int __btrfs_qgroup_release_data(struct inode 
*inode, u64 start, u64 len,
if (ret < 0)
goto out;
 
-   if (free) {
-   qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+   if (free)
trace_op = QGROUP_FREE;
-   }
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
+   if (free)
+   qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
 out:
ulist_free(changeset.range_changed);
return ret;
@@ -2706,6 +2715,8 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, 
int num_bytes)
return 0;
 
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
+   trace_qgroup_meta_reserve(root->fs_info, root->objectid,
+ (s64)num_bytes);
ret = qgroup_reserve(root, num_bytes);
if (ret < 0)
return ret;
@@ -2724,6 +2735,8 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
reserved = atomic_xchg(>qgroup_meta_rsv, 0);
if (reserved == 0)
return;
+   trace_qgroup_meta_reserve(root->fs_info, root->objectid,
+ (s64)-reserved);
qgroup_free(root, reserved);
 }
 
@@ -2736,6 +2749,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int 
num_bytes)
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
WARN_ON(atomic_read(>qgroup_meta_rsv) < num_bytes);
atomic_sub(num_bytes, >qgroup_meta_rsv);
+   trace_qgroup_meta_reserve(root->fs_info, root->objectid,
+ (s64)-num_bytes);
qgroup_free(root, num_bytes);
 }
 
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 1bc64c8..6b6756c 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -114,8 +114,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info 
*fs_info,
 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
 u64 ref_root, u64 num_bytes)
 {
-   btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
+   btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
 }
 void 

[PATCH v9.1 0/5] In-band de-duplication for btrfs-progs

2016-10-19 Thread Qu Wenruo
Patchset can be fetched from github:
https://github.com/adam900710/btrfs-progs.git dedupe_latest

Inband dedupe(in-memory backend only) ioctl support for btrfs-progs.

User/reviewer/tester can still use previous btrfs-progs patchset to test,
this update is just cleanuping unsupported functions, like on-disk
backend and any on-disk format change.

v7:
   Update ctree.h to follow kernel structure change
   Update print-tree to follow kernel structure change
V8:
   Move dedup props and on-disk backend support out of the patchset
   Change command group name to "dedupe-inband", to avoid confusion with
   possible out-of-band dedupe. Suggested by Mark.
   Rebase to latest devel branch.
V9:
   Follow kernels ioctl change to support FORCE flag, new reconf ioctl,
   and more precious error reporting.
v9.1:
   Rebased to v4.8.1.


Qu Wenruo (5):
  btrfs-progs: Basic framework for dedupe-inband command group
  btrfs-progs: dedupe: Add enable command for dedupe command group
  btrfs-progs: dedupe: Add disable support for inband dedupelication
  btrfs-progs: dedupe: Add status subcommand
  btrfs-progs: dedupe: introduce reconfigure subcommand

 Documentation/Makefile.in  |   1 +
 Documentation/btrfs-dedupe-inband.asciidoc | 167 +++
 Documentation/btrfs.asciidoc   |   4 +
 Makefile.in|   3 +-
 btrfs-completion   |   6 +-
 btrfs.c|   2 +
 cmds-dedupe-ib.c   | 436 +
 commands.h |   2 +
 dedupe-ib.h|  41 +++
 ioctl.h|  37 +++
 10 files changed, 697 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/btrfs-dedupe-inband.asciidoc
 create mode 100644 cmds-dedupe-ib.c
 create mode 100644 dedupe-ib.h

-- 
2.10.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v13.1 00/14] Btrfs In-band De-duplication

2016-10-19 Thread Qu Wenruo
This patchset can be fetched from github:
https://github.com/adam900710/linux.git wang_dedupe_latest

This version is just another small update, rebased to Chris' for-linus-4.9
branch.

Since preparation patch for sub-page size and inband dedupe is already
merged, now it's quite easy to rebase.
Only 2 small conflicts happens in this rebase.

Changelog:
v2:
  Totally reworked to handle multiple backends
v3:
  Fix a stupid but deadly on-disk backend bug
  Add handle for multiple hash on same bytenr corner case to fix abort
  trans error
  Increase dedup rate by enhancing delayed ref handler for both backend.
  Move dedup_add() to run_delayed_ref() time, to fix abort trans error.
  Increase dedup block size up limit to 8M.
v4:
  Add dedup prop for disabling dedup for given files/dirs.
  Merge inmem_search() and ondisk_search() into generic_search() to save
  some code
  Fix another delayed_ref related bug.
  Use the same mutex for both inmem and ondisk backend.
  Move dedup_add() back to btrfs_finish_ordered_io() to increase dedup
  rate.
v5:
  Reuse compress routine for much simpler dedup function.
  Slightly improved performance due to above modification.
  Fix race between dedup enable/disable
  Fix for false ENOSPC report
v6:
  Further enable/disable race window fix.
  Minor format change according to checkpatch.
v7:
  Fix one concurrency bug with balance.
  Slightly modify return value from -EINVAL to -EOPNOTSUPP for
  btrfs_dedup_ioctl() to allow progs to distinguish unsupported commands
  and wrong parameter.
  Rebased to integration-4.6.
v8:
  Rename 'dedup' to 'dedupe'.
  Add support to allow dedupe and compression work at the same time.
  Fix several balance related bugs. Special thanks to Satoru Takeuchi,
  who exposed most of them.
  Small dedupe hit case performance improvement.
v9:
  Re-order the patchset to completely separate pure in-memory and any
  on-disk format change.
  Fold bug fixes into its original patch.
v10:
  Adding back missing bug fix patch.
  Reduce on-disk item size.
  Hide dedupe ioctl under CONFIG_BTRFS_DEBUG.
v11:
  Remove other backend and props support to focus on the framework and
  in-memory backend. Suggested by David.
  Better disable and buffered write race protection.
  Comprehensive fix to dedupe metadata ENOSPC problem.
v12:
  Stateful 'enable' ioctl and new 'reconf' ioctl
  New FORCE flag for enable ioctl to allow stateless ioctl
  Precise error report and extendable ioctl structure.
v12.1
  Rebase to David's for-next-20160704 branch
  Add co-ordinate patch for subpage and dedupe patchset. 
v12.2
  Rebase to David's for-next-20160715 branch
  Add co-ordinate patch for other patchset.
v13
  Rebase to David's for-next-20160906 branch
  Fix a reserved space leak bug, which only frees quota reserved space
  but not space_info->byte_may_use.
v13.1
  Rebase to Chris' for-linux-4.9 branch

Qu Wenruo (4):
  btrfs: delayed-ref: Add support for increasing data ref under spinlock
  btrfs: dedupe: Inband in-memory only de-duplication implement
  btrfs: relocation: Enhance error handling to avoid BUG_ON
  btrfs: dedupe: Introduce new reconfigure ioctl

Wang Xiaoguang (10):
  btrfs: dedupe: Introduce dedupe framework and its header
  btrfs: dedupe: Introduce function to initialize dedupe info
  btrfs: dedupe: Introduce function to add hash into in-memory tree
  btrfs: dedupe: Introduce function to remove hash from in-memory tree
  btrfs: dedupe: Introduce function to search for an existing hash
  btrfs: dedupe: Implement btrfs_dedupe_calc_hash interface
  btrfs: ordered-extent: Add support for dedupe
  btrfs: dedupe: Add ioctl for inband dedupelication
  btrfs: improve inode's outstanding_extents computation
  btrfs: dedupe: fix false ENOSPC

 fs/btrfs/Makefile|   2 +-
 fs/btrfs/ctree.h |  23 +-
 fs/btrfs/dedupe.c| 820 +++
 fs/btrfs/dedupe.h| 201 +-
 fs/btrfs/delayed-ref.c   |  30 +-
 fs/btrfs/delayed-ref.h   |   8 +
 fs/btrfs/disk-io.c   |   4 +
 fs/btrfs/extent-tree.c   |  82 +++-
 fs/btrfs/extent_io.c |  63 ++-
 fs/btrfs/extent_io.h |  15 +-
 fs/btrfs/file.c  |  26 +-
 fs/btrfs/free-space-cache.c  |   5 +-
 fs/btrfs/inode-map.c |   4 +-
 fs/btrfs/inode.c | 423 
 fs/btrfs/ioctl.c |  93 -
 fs/btrfs/ordered-data.c  |  46 ++-
 fs/btrfs/ordered-data.h  |  14 +
 fs/btrfs/relocation.c|  44 ++-
 fs/btrfs/sysfs.c |   2 +
 fs/btrfs/tests/extent-io-tests.c |   6 +-
 include/uapi/linux/btrfs.h   |  55 +++
 21 files changed, 1823 insertions(+), 143 deletions(-)
 create mode 100644 fs/btrfs/dedupe.c

-- 
2.10.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH 1/2] btrfs: Add WARN_ON for qgroup reserved underflow

2016-10-19 Thread Qu Wenruo



At 10/19/2016 10:31 PM, David Sterba wrote:

On Fri, Sep 30, 2016 at 09:15:36AM +0800, Qu Wenruo wrote:

While the reason why qgroup reserved space may underflow is still under
investigation, such WARN_ON will help us to expose the bug more easily,
and for end-user we can detect and avoid underflow.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/qgroup.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8db2e29..8532587 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1061,8 +1061,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
-   if (sign > 0)
-   qgroup->reserved -= num_bytes;
+   if (sign > 0) {
+   if (WARN_ON(qgroup->reserved < num_bytes))


That's only partially helpful, you should also print the numbers,
ref_root and/or qgroup id.



I'm OK to add more output.
But normally it's not really help though.

Thanks,
Qu


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] btrfs-progs: fsck: Add support to clear v1 free space cache.

2016-10-19 Thread Qu Wenruo



At 10/19/2016 11:13 PM, David Sterba wrote:

On Thu, Oct 13, 2016 at 05:22:26PM +0800, Qu Wenruo wrote:

Kernel clear_cache mount option will only rebuilt free space cache if
used space of that chunk has changed.

So it won't ensure any corrupted free space cache get cleared.

So add a new option "--clear-space-cache v1|v2" to btrfsck, to
completely wipe out free space cache.
So kernel won't complain again.

Reported-by: Ivan P 
Signed-off-by: Qu Wenruo 
---
 Documentation/btrfs-check.asciidoc |   9 +++
 cmds-check.c   |  63 ++-
 free-space-cache.c | 124 +
 free-space-cache.h |   2 +
 4 files changed, 197 insertions(+), 1 deletion(-)

diff --git a/Documentation/btrfs-check.asciidoc 
b/Documentation/btrfs-check.asciidoc
index a32e1c7..ef1e464 100644
--- a/Documentation/btrfs-check.asciidoc
+++ b/Documentation/btrfs-check.asciidoc
@@ -78,6 +78,15 @@ respective superblock offset is within the device size
 This can be used to use a different starting point if some of the primary
 superblock is damaged.

+--clear-space-cache v1|v2::
+completely wipe out all free space cache.
+Only v1(file based) free space cache is supported yet.
++
+NOTE: Kernel mount option 'clear_cache' is only designed to rebuild free space 
cache
+which is modified during the lifetime of that mount option.
+It doesn't rebuild all free space cache, nor clear them out.
+
+
 DANGEROUS OPTIONS
 -

diff --git a/cmds-check.c b/cmds-check.c
index 670ccd1..f62fc62 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -11206,6 +11206,36 @@ out:
return bad_roots;
 }

+static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
+{
+   struct btrfs_trans_handle *trans;
+   struct btrfs_block_group_cache *bg_cache;
+   u64 current = 0;
+   int ret = 0;
+
+   /* Clear all free space cache inodes and its extent data */
+   while (1) {
+   bg_cache = btrfs_lookup_first_block_group(fs_info, current);
+   if (!bg_cache)
+   break;
+   ret = btrfs_clear_free_space_cache(fs_info, bg_cache);


The function can fail for a lot of reasons, what would be the filesystem
state when we exit here? Some of the inodes could be cleared completely,
the last one partially.  The function copes with a missing inode item
but I don't know how many other intermediate states could be left.


If we exit here, no damage for the filesystem will be done, since we are 
protected by transaction.


As you can find, in btrfs_clear_free_space_cache(),
it will only commit transaction if we fully cleaned the whole inode and 
its free space header.


So we're quite safe here, free space header and inode are cleaned 
atomically.


PS: We really need btrfs_abort_transaction(), or when we exit abnormally 
we will get a lot of backtrace/warning on uncommitted transaction.


Thanks,
Qu




+   if (ret < 0)
+   return ret;
+   current = bg_cache->key.objectid + bg_cache->key.offset;
+   }
+
+   /* Don't forget to set cache_generation to -1 */
+   trans = btrfs_start_transaction(fs_info->tree_root, 0);
+   if (IS_ERR(trans)) {
+   error("failed to update super block cache generation");
+   return PTR_ERR(trans);
+   }
+   btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
+   btrfs_commit_transaction(trans, fs_info->tree_root);
+
+   return ret;
+}
+
 const char * const cmd_check_usage[] = {
"btrfs check [options] ",
"Check structural integrity of a filesystem (unmounted).",
@@ -11233,6 +11263,9 @@ const char * const cmd_check_usage[] = {
"-r|--tree-root  use the given bytenr for the tree root",
"--chunk-rootuse the given bytenr for the chunk tree 
root",
"-p|--progress   indicate progress",
+   "--clear-space-cache v1|v2   clear space cache for v1(file based) or ",
+   "v2(tree based).",
+   "Only support v1 yet",
NULL
 };

@@ -11250,6 +11283,7 @@ int cmd_check(int argc, char **argv)
u64 num;
int init_csum_tree = 0;
int readonly = 0;
+   int clear_space_cache = 0;
int qgroup_report = 0;
int qgroups_repaired = 0;
unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
@@ -11259,7 +11293,7 @@ int cmd_check(int argc, char **argv)
enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
-   GETOPT_VAL_MODE };
+   GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
static const struct option long_options[] = {
{ "super", 

Re: Rust library for BTRFS

2016-10-19 Thread James Pharaoh

On 15/10/16 19:41, Hans van Kranenburg wrote:

On 10/15/2016 04:31 PM, James Pharaoh wrote:

 >>

I've basically reimplemented what I find in other tools because, as far
as I can tell, there is no C library for BTRFS at this point.


For code operating on an online filesystem, using the IOCTLs, you don't
really need a lot from btrfs-progs. You need to known about some structs
and throw them back and forth to the kernel. And not being afraid of
doing some reverse engineering helps. :o)


Yeah I've managed fine so far. I feel like someone needs to split out 
this library, really, but I'm not particularly interested into doing C 
dev myself, except when it's necessary to interface with something.



The btrfs-progs programs operate in two ways:
1. Using the IOCTLs, implementing a command/args input model and
converting the output back to text on the console.
2. Also directly doing low-level plumbing, reading and writing inside
unmounted btrfs filesystems. For this, there's a copy of a bunch of code
from the kernel, to be able to read/write a filesystem that is not
mounted from the running program, in a similar way that kernel does when
it's live. If you don't plan to write your own btrfs check --repair,
this is also not what you need to import as a library.


I have realised this as well. If I understand correctly, and this is one 
of the things I find most interesting about BTRFS, is that it has a kind 
of "microkernel" approach to managing it's on-disk data structures, 
where a small, well-tested core of code manages some basic storage, and 
advanced features are built on top of this.


As I understand it, the tools are able to simply read these data 
structures directly from the disk, presumably after acquiring some kind 
of "read lock" from the kernel, which is sufficient because I believe 
BTFRS is COW at a low level, so with a read lock on the "committed root" 
you are looking at, there is a guarantee all the data structures inside 
it will be valid.


Any confirmation or comment on this would be appreciated, I am planning 
to delve in further soon with my current efforts..



This might be interesting:
https://patchwork.kernel.org/patch/9356749/


Will take a look...


But, I'm not the C expert here, this is what I know.


The currently supported functions are:

- Deduplication
- File system info
- Space info


Is there a specific use case you're building this for?


I am building a dense hosting platform, a kind of alternative-to-cloud 
solution which acts/looks much more like traditional hosting, but with 
immensely better backups and space efficiency.


I am also using zbackup as a way to store a LOT of similar builds of 
entire containers (ie entire operating systems minus the kernel which is 
shared), storing them efficiently, and deploying them efficiently.



My primary trigger to start on it is best described as: "Tired of
parsing the output of btrfs  ? Try this!"

Also, if you're using IRC, #btrfs on freenode is a good place to hang out.


I have tried this but often don't get a response. I feel IRC is a bit 
packed these days and people go for the easy answers and ignore the more 
in depth ones...


Nevertheless, I've exchanged several emails with the author of ZBackup 
and have a list of issues/feature requests/suggestions which I will be 
adding to github and opening for discussion on the mailing list very soon.



Have fun,


Always!

James
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Monitoring Btrfs

2016-10-19 Thread Stefan Malte Schumacher
Thanks to everybody for the help. I have been doing regular
smart-tests for quite some time and I also let scrub run on a regular
basis.
I will use a simple shellscript witch inspects the output of "greps
btrfs fi show" for lines containing "missing" and another which checks
"btrfs dev stats" for None-Zero Entries.

Thanks everybody for your advice.
Stefan

2016-10-17 18:44 GMT+02:00 Stefan Malte Schumacher
:
> Hello
>
> I would like to monitor my btrfs-filesystem for missing drives. On
> Debian mdadm uses a script in /etc/cron.daily, which calls mdadm and
> sends an email if anything is wrong with the array. I would like to do
> the same with btrfs. In my first attempt I grepped and cut the
> information from "btrfs fi show" and let the script send an email if
> the number of devices was not equal to the preselected number.
>
> Then I saw this:
>
> ubuntu@ubuntu:~$ sudo btrfs filesystem show
> Label: none  uuid: 67b4821f-16e0-436d-b521-e4ab2c7d3ab7
> Total devices 6 FS bytes used 5.47TiB
> devid1 size 1.81TiB used 1.71TiB path /dev/sda3
> devid2 size 1.81TiB used 1.71TiB path /dev/sdb3
> devid3 size 1.82TiB used 1.72TiB path /dev/sdc1
> devid4 size 1.82TiB used 1.72TiB path /dev/sdd1
> devid5 size 2.73TiB used 2.62TiB path /dev/sde1
> *** Some devices missing
>
> on this page: 
> https://btrfs.wiki.kernel.org/index.php/Using_Btrfs_with_Multiple_Devices
> The number of devices is still at 6, despite the fact that one of the
> drives is missing, which means that my first idea doesnt work. I have
> two questions:
> 1) Has anybody already written a script like this? After all, there is
> no need to reinvent the wheel a second time.
> 2) What should I best grep for? In this case I would just go for the
> "missing". Does this cover all possible outputs of btrfs fi show in
> case of a damaged array? What other outputs do I need to consider for
> my script.
>
> Yours sincerely
> Stefan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Monitoring Btrfs

2016-10-19 Thread Anand Jain



On 10/19/16 21:33, Austin S. Hemmelgarn wrote:

On 2016-10-19 09:06, Anand Jain wrote:



On 10/19/16 19:15, Austin S. Hemmelgarn wrote:

On 2016-10-18 17:36, Anand Jain wrote:




I would like to monitor my btrfs-filesystem for missing drives.




This is actually correct behavior, the filesystem reports that it
should
have 6 devices, which is how it knows a device is missing.




 Missing - means missing at the time of mount. So how are you
planning
to monitor a disk which is failed while in production ?



No, in `btrfs fi show` it means that it can't find the device.


 'btrfs fi show' is miss-leading as compared to 'btrfs fi show -m'
 -m tells btrfs-kernel perspective of the devices, as of now
 there is no code in the kernel which changes the device status
 while its mounted (expect for readonly, which is irrelevant in
 raid1 with 1 disk failed).



Actually, that's exactly how I would expect each of them to behave.  We
need some way to get both the state the kernel thinks the FS is in, and
the state it's actually in (according to the tools, not the kernel), and
'-m' reporting kernel state while no '-m' reports actual state is
exactly what I would expect in this case.




That leads also to another way I hadn't thought of to monitor a
filesystem.  The output of 'fi show' with and without '-m' should match
if the filesystem was healthy when mounted and is still healthy, if they
don't, then something is wrong.




1. Filesystem flags.  These will change when the filesystem goes
degraded,


  Which flag is in question here. ?

I should clarify here, I mean the mount options, I'm just used to the
monit terminology (which was not well picked in this case).  The big one
to watch is the read-only flag, as BTRFS will force a filesystem
read-only (which updates the mount options).  Any change to the mount
options though without manual intervention is generally a sign that
_something_ is wrong.



 btrfs-progs shouldn't add its own intelligence in determining the
 device state, it should be a transparent tool to report status from
 the btrfs-kernel. So I opposed to the patches such as

commit 206efb60cbe3049e0d44c6da3c1909aeee18f813
btrfs-progs: Add missing devices check for mounted btrfs.

 There are many ways a device can fail/recover in the SAN environment,
 these device state managing intelligence should be at one place and
 in the kernel. The volume manager part of the code in the kernel
 is incomplete.


I don't agree that the management should be completely unified or that
the tools should just report kernel state.  The tools have to have some
way to check device state for unmounted filesystems because they have to
operate on unmounted filesystems, and because until the kernel gets
smart enough to actually handle device state properly, some method is
needed to check the actual state of the devices.  Even once the kernel
is smart enough, it's still helpful to see without mounting a filesystem
whether or not all the devices are there, and if we ever switch to a
real mount helper (which I am in favor of for multiple reasons), we'll
need device state checking in userspace for that too.



 Bit out of context. here its about monitoring device when FS
 is mounted, in this context, if there is tool which would make
 its own intelligence without kernel, then that's wrong.





Take a look for at LVM.  The separation of responsibilities there is
ideally what we should be looking at long term for BTRFS.  The userspace
components tell the kernel what to do, and list both kernel state _and_
physical state in a readable manner.  The kernel tracks limited parts of
the state (only for active LV's, so the equivalent of mounted
filesystems, and even then only what it needs to track (Is this RAID
volume in sync?  Is that snapshot or thin storage pool getting close to
full?)), and sends notifications to a userspace component which then
acts on those conditions (possibly then telling the kernel what to do in
response to them).  On top of that, the userspace components don't
require a kernel which supports them for any off-line operations, and
the kernel works fine with older userspace.  Both userspace and the
kernel handle missing devices (userspace tools report them, the kernel
refuses to activate LV's that require them).
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Cross subvolume "cp --reflink"?

2016-10-19 Thread Noah Massey
On Wed, Oct 19, 2016 at 12:52 PM, Andrei Borzenkov  wrote:
> I get "Failed to clone: Invalid cross-device link". Is it expected?

Yes, you cannot cross a MOUNTPOINT boundary with reflink.
Or, for that matter, btrfs subvolume snapshot.

> Basically this is (on openSUSE TW which has root on subvolume)
>
> mount -o subvol=/ /dev/vda1 /mnt
> btrfs sub create /mnt/var/cache
> cp -a --reflink=always /var/cache/* /mnt/var/cache
>

You *can* however cross subvolume boundaries within the same mountpoint.
Try (assuming your root subvolume is @root)

cp -a --reflink /mnt/@root/var/cache/* /mnt/var/cache

~ Noah
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kernel BUG at fs/btrfs/extent_io.c:2062 (v4.2.0-rc8)

2016-10-19 Thread Dāvis Mosāns
Now when trying to read this one file on 4.8.2

[  131.497595] [ cut here ]
[  131.499411] kernel BUG at fs/btrfs/extent_io.c:2315!
[  131.500786] invalid opcode:  [#1] PREEMPT SMP
[  131.500786] Modules linked in: rpcsec_gss_krb5 auth_rpcgss
oid_registry nfsv4 dns_resolver nfs lockd grace sunrpc fscache kvm_amd
mousedev kvm irqbypass crct10dif_pclmul crc32_pclmul nls_iso8859_1
ghash_clmulni_intel nls_cp437 aesni_intel vfat snd_intel8x0 input_leds
aes_x86_64 fat ppdev evdev snd_ac97_codec led_class ac97_bus lrw
snd_pcm snd_timer gf128mul glue_helper ablk_helper acpi_cpufreq cryptd
mac_hid snd psmouse pcspkr tpm_tis intel_agp parport_pc soundcore
intel_gtt qemu_fw_cfg i2c_piix4 tpm_tis_core parport tpm button
sch_fq_codel ip_tables x_tables btrfs xor raid6_pq virtio_gpu
drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm
sd_mod virtio_balloon virtio_console virtio_blk virtio_net ata_generic
drm pata_acpi xhci_pci xhci_hcd ahci serio_raw virtio_pci atkbd libps2
libahci usbcore ata_piix virtio_ring libata scsi_mod usb_common virtio
crc32c_intel i8042 serio floppy
[  131.500786] CPU: 1 PID: 145 Comm: kworker/u12:7 Not tainted 4.8.2-1-ARCH #1
[  131.500786] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 0.0.0 02/06/2015
[  131.500786] Workqueue: btrfs-endio btrfs_endio_helper [btrfs]
[  131.500786] task: 880233f1e3c0 task.stack: 880233f3
[  131.500786] RIP: 0010:[]  []
btrfs_check_repairable+0xfc/0x100 [btrfs]
[  131.500786] RSP: 0018:880233f33cc0  EFLAGS: 00010206
[  131.500786] RAX: 0001 RBX: 8802292e2f80 RCX: 
[  131.500786] RDX: 0002 RSI:  RDI: 880236148f70
[  131.500786] RBP: 880233f33cd8 R08: 001721c0 R09: 001761c0
[  131.500786] R10: ea0008d496c0 R11: 3294a8c0 R12: 3294a8c0
[  131.500786] R13: 88023294b5a8 R14: 88023294b5a8 R15: 8802327a2830
[  131.500786] FS:  () GS:88023fc4()
knlGS:
[  131.500786] CS:  0010 DS:  ES:  CR0: 80050033
[  131.500786] CR2: 007477e0 CR3: 02806000 CR4: 000406e0
[  131.500786] Stack:
[  131.500786]  ea0008a2b240 000a3000 88023294b630
880233f33d90
[  131.500786]  a02b612f 880235d82d00 880233f33d10
880233f1e3c0
[  131.500786]  880233f1e3c0 88023294b540 8802327a2680

[  131.500786] Call Trace:
[  131.500786]  [] end_bio_extent_readpage+0x44f/0x5f0 [btrfs]
[  131.500786]  [] bio_endio+0x53/0x60
[  131.500786]  [] end_workqueue_fn+0x3c/0x40 [btrfs]
[  131.500786]  [] btrfs_scrubparity_helper+0x7d/0x340 [btrfs]
[  131.500786]  [] btrfs_endio_helper+0xe/0x10 [btrfs]
[  131.500786]  [] process_one_work+0x1e5/0x470
[  131.500786]  [] worker_thread+0x48/0x4e0
[  131.500786]  [] ? process_one_work+0x470/0x470
[  131.500786]  [] ? process_one_work+0x470/0x470
[  131.500786]  [] kthread+0xd8/0xf0
[  131.500786]  [] ? __switch_to+0x2d2/0x630
[  131.500786]  [] ret_from_fork+0x1f/0x40
[  131.500786]  [] ? kthread_worker_fn+0x170/0x170
[  131.500786] Code: 25 89 4b 28 e9 7c ff ff ff 44 3b 63 28 75 1c c7
43 30 00 00 00 00 c7 43 28 00 00 00 00 b9 01 00 00 00 31 c0 eb d2 8d
48 02 eb d6 <0f> 0b 66 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55
41 54
[  131.500786] RIP  []
btrfs_check_repairable+0xfc/0x100 [btrfs]
[  131.500786]  RSP 
[  132.075621] [ cut here ]
[  132.078829] kernel BUG at fs/btrfs/extent_io.c:2315!
[  132.086719] ---[ end trace 6a5a7b58c27bde1c ]---
[  132.096386] systemd-journald[230]: Compressed data object 806 ->
744 using LZ4
[  132.086711] invalid opcode:  [#2] PREEMPT SMP
[  132.086711] Modules linked in: rpcsec_gss_krb5 auth_rpcgss
oid_registry nfsv4 dns_resolver nfs lockd grace sunrpc fscache kvm_amd
mousedev kvm irqbypass crct10dif_pclmul crc32_pclmul nls_iso8859_1
ghash_clmulni_intel nls_cp437 aesni_intel vfat snd_intel8x0 input_leds
aes_x86_64 fat ppdev evdev snd_ac97_codec led_class ac97_bus lrw
snd_pcm snd_timer gf128mul glue_helper ablk_helper acpi_cpufreq cryptd
mac_hid snd psmouse pcspkr tpm_tis intel_agp parport_pc soundcore
intel_gtt qemu_fw_cfg i2c_piix4 tpm_tis_core parport tpm button
sch_fq_codel ip_tables x_tables btrfs xor raid6_pq virtio_gpu
drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm
sd_mod virtio_balloon virtio_console virtio_blk virtio_net ata_generic
drm pata_acpi xhci_pci xhci_hcd ahci serio_raw virtio_pci atkbd libps2
libahci usbcore ata_piix virtio_ring libata scsi_mod usb_common virtio
crc32c_intel i8042 serio floppy
[  132.086711] CPU: 5 PID: 142 Comm: kworker/u12:4 Tainted: G  D
  4.8.2-1-ARCH #1
[  132.086711] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 0.0.0 02/06/2015
[  132.086711] Workqueue: btrfs-endio btrfs_endio_helper [btrfs]
[  132.086711] task: 880233f1aac0 task.stack: 880233f24000
[  132.086711] RIP: 0010:[]  []

Re: bio linked list corruption.

2016-10-19 Thread Linus Torvalds
On Wed, Oct 19, 2016 at 10:09 AM, Philipp Hahn  wrote:
>
> Nearly a month ago I reported also a "list_add corruption", but with 4.1.6:
> 
>
> That server rungs Samba4, which also is a heavy user of xattr.

That one looks very different. In fact, the list that got corrupted
for you has since been changed to a hlist (which is *similar* to our
doubly-linked list, but has a smaller head and does not allow adding
to the end of the list).

Also, the "should be" and "was" values are very close, and switched:

should be 81ab3ca8, but was 81ab3cc8
should be 81ab3cc8, but was 81ab3ca8

so it actually looks like it was the same data structure. In
particular, it looks like enqueue_timer() ended up racing on adding an
entry to one index in the "base->vectors[]" array, while hitting an
entry that was pointing to another index near-by.

So I don't think it's related. Yours looks like some subtle timer base
race. It smells like a locking problem with timers. I'm not seeing
what it might be, but it *might* have been fixed by doing the
TIMER_MIGRATING bit right in add_timer_on() (commit 22b886dd1018).

Adding some timer people just in case, but I don't think your 4.1
report is related.

Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: bio linked list corruption.

2016-10-19 Thread Philipp Hahn
Hello,

Am 19.10.2016 um 01:42 schrieb Chris Mason:
> On Tue, Oct 18, 2016 at 04:39:22PM -0700, Linus Torvalds wrote:
>> On Tue, Oct 18, 2016 at 4:31 PM, Chris Mason  wrote:
>>>
>>> Jens, not sure if you saw the whole thread.  This has triggered bad page
>>> state errors, and also corrupted a btrfs list.  It hurts me to say,
>>> but it
>>> might not actually be your fault.
>>
>> Where is that thread, and what is the "this" that triggers problems?
>>
>> Looking at the "->mq_list" users, I'm not seeing any changes there in
>> the last year or so. So I don't think it's the list itself.
> 
> Seems to be the whole thing:
> 
> http://www.gossamer-threads.com/lists/linux/kernel/2545792
> 
> My guess is xattr, but I don't have a good reason for that.

Nearly a month ago I reported also a "list_add corruption", but with 4.1.6:


That server rungs Samba4, which also is a heavy user of xattr.

Might be that it is related.

Philipp Hahn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs-progs: Add a command to show bg info

2016-10-19 Thread Divya Indi



On 10/17/2016 10:24 PM, Roman Mamedov wrote:

On Tue, 18 Oct 2016 09:39:32 +0800
Qu Wenruo  wrote:


  static const char * const cmd_inspect_inode_resolve_usage[] = {
"btrfs inspect-internal inode-resolve [-v]  ",
"Get file system paths for the given inode",
@@ -702,6 +814,8 @@ const struct cmd_group inspect_cmd_group = {
0 },
{ "min-dev-size", cmd_inspect_min_dev_size,
cmd_inspect_min_dev_size_usage, NULL, 0 },
+   { "bg_analysis", cmd_inspect_bg_analysis,
+   cmd_inspect_bg_analysis_usage, NULL, 0 },

Just naming preference, IMHO show-block-groups or dump-block-groups
seems better for me.

And in any case please don't mix separation by "-" and "_" in the same command
string. In btrfs tool the convention is to separate words in subcommand names
using "-".


Noted, thanks! Will update the patch to correct this.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs partition fails to mount - kernel BUG at ../fs/btrfs/extent-tree.c:1872

2016-10-19 Thread Dāvis Mosāns
Happens for me too with 4.8.2

[ 1002.265296] BTRFS info (device vdb): disk space caching is enabled
[ 1002.265299] BTRFS info (device vdb): has skinny extents
[ 1002.952569] BTRFS info (device vdb): continuing balance
[ 1003.079696] BTRFS info (device vdb): relocating block group
655951396864 flags 17
[ 1004.028703] BTRFS info (device vdb): found 105 extents
[ 1004.956861] [ cut here ]
[ 1004.956910] kernel BUG at fs/btrfs/extent-tree.c:1868!
[ 1004.956946] invalid opcode:  [#1] PREEMPT SMP
[ 1004.956978] Modules linked in: mousedev nls_iso8859_1 nls_cp437
vfat fat kvm_amd kvm irqbypass crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul input_leds
snd_intel8x0 evdev glue_helper led_class ablk_helper ppdev cryptd
mac_hid snd_ac97_codec ac97_bus psmouse snd_pcm acpi_cpufreq snd_timer
parport_pc tpm_tis tpm_tis_core snd intel_agp tpm pcspkr button
qemu_fw_cfg parport intel_gtt i2c_piix4 soundcore sch_fq_codel
ip_tables x_tables btrfs xor sd_modraid6_pq virtio_gpu drm_kms_helper
syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_net
virtio_console virtio_blk virtio_balloon xhci_pci xhci_hcd serio_raw
ahci atkbd usbcore libps2 libahci i8042 ata_generic virtio_pci
pata_acpi ata_piix virtio_ring usb_common floppy serio libata
crc32c_intel virtio scsi_mod
[ 1004.957638] CPU: 5 PID: 143 Comm: kworker/u12:6 Tainted: GW
  4.8.2-1-ARCH #1
[ 1004.957690] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 0.0.0 02/06/2015
[ 1004.957767] Workqueue: btrfs-extent-refs btrfs_extent_refs_helper [btrfs]
[ 1004.957812] task: 880233e40e40 task.stack: 880233e48000
[ 1004.957851] RIP: 0010:[]  []
update_inline_extent_backref+0x2ab/0x2b0 [btrfs]
[ 1004.957930] RSP: 0018:880233e4bac8  EFLAGS: 00010293
[ 1004.957965] RAX: 0005 RBX: 25f0 RCX: 0002
[ 1004.958010] RDX: 8800 RSI: 2621 RDI: 
[ 1004.958055] RBP: 880233e4bb18 R08: 4000 R09: 
[ 1004.958100] R10: 2609 R11: 0002 R12: 88022acee348
[ 1004.958145] R13: fffb R14: 880233638540 R15: 00b2
[ 1004.958194] FS:  () GS:88023fd4()
knlGS:
[ 1004.958246] CS:  0010 DS:  ES:  CR0: 80050033
[ 1004.958283] CR2: 7f42b0906120 CR3: 000233db1000 CR4: 000406e0
[ 1004.958331] Stack:
[ 1004.958348]  2609 880233e4bbbc 88022d3b9000
fffb
[ 1004.958406]  2608 88022d3b9000 
88023295ee60
[ 1004.958463]  880233638540 007f 880233e4bb30
a027ac8f
[ 1004.958519] Call Trace:
[ 1004.958553]  [] remove_extent_backref+0x3f/0x80 [btrfs]
[ 1004.958610]  []
__btrfs_free_extent.isra.33+0x6c8/0xca0 [btrfs]
[ 1004.958674]  []
__btrfs_run_delayed_refs+0x5b0/0x12e0 [btrfs]
[ 1004.958727]  [] ? kmem_cache_free+0x1e0/0x210
[ 1004.958769]  [] ? cpuacct_charge+0x86/0xa0
[ 1004.958821]  [] btrfs_run_delayed_refs+0xa1/0x2a0 [btrfs]
[ 1004.958879]  [] delayed_ref_async_start+0x94/0xb0 [btrfs]
[ 1004.958939]  [] btrfs_scrubparity_helper+0x7d/0x340 [btrfs]
[ 1004.959007]  [] btrfs_extent_refs_helper+0xe/0x10 [btrfs]
[ 1004.959059]  [] process_one_work+0x1e5/0x470
[ 1004.959104]  [] worker_thread+0x48/0x4e0
[ 1004.959148]  [] ? process_one_work+0x470/0x470
[ 1004.959194]  [] kthread+0xd8/0xf0
[ 1004.959233]  [] ? __switch_to+0x2d2/0x630
[ 1004.960132]  [] ret_from_fork+0x1f/0x40
[ 1004.960132]  [] ? kthread_worker_fn+0x170/0x170
[ 1004.960132] Code: b0 e9 6b ff ff ff 41 bf 0d 00 00 00 41 bd 0d 00
00 00 e9 8f fe ff ff 0f 0b 0f 0b 44 89 e8 f7 d8 48 98 48 39 c1 0f 83
4f ff ff ff <0f> 0b 0f 1f 00 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41
55 41
[ 1004.960132] RIP  []
update_inline_extent_backref+0x2ab/0x2b0 [btrfs]
[ 1004.960132]  RSP 
[ 1004.977825] ---[ end trace 8696bc20b65a0f77 ]---
[ 1004.979754] note: kworker/u12:6[143] exited with preempt_count 2



Label: 'RAID1'  uuid: 247e6249-6de1-45cb-9dd0-fa8a654234bf
Total devices 2 FS bytes used 360.15GiB
devid1 size 2.73TiB used 363.03GiB path /dev/vdb
devid2 size 2.73TiB used 363.03GiB path /dev/vda

Overall:
Device size:   5.46TiB
Device allocated:726.06GiB
Device unallocated:4.75TiB
Device missing:  0.00B
Used:720.30GiB
Free (estimated):  2.38TiB  (min: 2.38TiB)
Data ratio:   2.00
Metadata ratio:   2.00
Global reserve:  512.00MiB  (used: 0.00B)

Data,RAID1: Size:360.00GiB, Used:358.52GiB
   /dev/vda  360.00GiB
   /dev/vdb  360.00GiB

Metadata,RAID1: Size:3.00GiB, Used:1.63GiB
   /dev/vda3.00GiB
   /dev/vdb3.00GiB

System,RAID1: Size:32.00MiB, Used:80.00KiB
   /dev/vda   32.00MiB
   /dev/vdb   32.00MiB


Re: Cross subvolume "cp --reflink"?

2016-10-19 Thread Andrei Borzenkov
19.10.2016 20:04, Hugo Mills пишет:
> On Wed, Oct 19, 2016 at 07:52:14PM +0300, Andrei Borzenkov wrote:
>> I get "Failed to clone: Invalid cross-device link". Is it expected?
>> Basically this is (on openSUSE TW which has root on subvolume)
>>
>> mount -o subvol=/ /dev/vda1 /mnt
>> btrfs sub create /mnt/var/cache
>> cp -a --reflink=always /var/cache/* /mnt/var/cache
>>
>> Kernel 4.7.5-1-default.
> 
>Yes, you're trying to copy across a mountpoint boundary. This isn't
> going to work. You'll need to mount subvolid=0 on /mnt, and then use
> the source and target of the cp *both* from within the /mnt subtree.
> 
>Reflink operations (including cp --reflink=always, and the
> reflink-aware mv) work across subvol boundaries, but not mount
> boundaries.
> 
>Hugo.
> 
Yes, figured that much too. Thank you!



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 3/3] btrfs-progs: Add command to check if balance op is req

2016-10-19 Thread Divya Indi

On 10/17/2016 06:42 PM, Qu Wenruo wrote:



At 10/18/2016 08:35 AM, Divya Indi wrote:

Add new subcommand to btrfs inspect-internal

btrfs inspect-internal balance_check 
Checks whether 'btrfs balance' can help creating more space (Only
considers data block groups).


I didn't think it's good to add a new subcommand just for that.

Why not output such relocation sugguestion for you previous bg-analyze 
subcommand?

(It's better to make it a parameter to trigger such output)

Thanks,
Qu

Or maybe as an option to btrfs balance start?
Eg: btrfs balance start --check-only 


Signed-off-by: Divya Indi 
Reviewed-by: Ashish Samant 
Reviewed-by: Liu Bo 
---
 cmds-inspect.c |  147 


 1 files changed, 147 insertions(+), 0 deletions(-)

diff --git a/cmds-inspect.c b/cmds-inspect.c
index 0e2f15a..5baaa49 100644
--- a/cmds-inspect.c
+++ b/cmds-inspect.c
@@ -267,6 +267,151 @@ static const char * const 
cmd_inspect_inode_resolve_usage[] = {

 NULL
 };

+static const char * const cmd_inspect_balance_check_usage[] = {
+"btrfs inspect-internal balance_check ",
+"To check whether 'btrfs balance' can help creating more space",
+"",
+"",
+NULL
+};
+
+static int cmd_inspect_balance_check(int argc, char **argv)
+{
+struct btrfs_ioctl_search_args args;
+struct btrfs_ioctl_search_args bg_args;
+struct btrfs_ioctl_search_key *sk;
+struct btrfs_ioctl_search_key *bg_sk;
+struct btrfs_ioctl_search_header *header;
+struct btrfs_ioctl_search_header *bg_header;
+struct btrfs_block_group_item *bg;
+struct btrfs_chunk *chunk;
+unsigned long off = 0;
+unsigned long bg_off = 0;
+DIR *dirstream = NULL;
+int fd;
+int i;
+u64 total_free = 0;
+u64 min_used = (u64)-1;
+u64 free_of_min_used = 0;
+u64 bg_of_min_used = 0;
+u64 flags;
+u64 used;
+int ret = 0;
+int nr_data_bgs = 0;
+
+if (check_argc_exact(argc, 2))
+usage(cmd_inspect_balance_check_usage);
+
+fd = btrfs_open_dir(argv[optind], , 1);
+if (fd < 0)
+return 1;
+
+memset(, 0, sizeof(args));
+sk = 
+sk->min_offset = sk->min_transid = 0;
+sk->max_offset = sk->max_transid = (u64)-1;
+
+printf("%20s%20s%20s\n", "Start", "Len", "Used");
+while (1) {
+ret = get_chunks(fd, );
+if (ret < 0)
+goto out;
+
+/*
+ * it should not happen.
+ */
+if (sk->nr_items == 0)
+break;
+
+off = 0;
+memset(_args, 0, sizeof(bg_args));
+bg_sk = _args.key;
+
+/* For every chunk, look up 1 exact match for block group in
+ * the extent tree. */
+bg_sk->tree_id = BTRFS_EXTENT_TREE_OBJECTID;
+bg_sk->min_type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+bg_sk->max_type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+bg_sk->min_transid =  0;
+bg_sk->max_transid = (u64)-1;
+
+for (i = 0; i < sk->nr_items; i++) {
+header = (struct btrfs_ioctl_search_header *)(args.buf
+  + off);
+off += sizeof(*header);
+if (header->type == BTRFS_CHUNK_ITEM_KEY) {
+chunk = (struct btrfs_chunk *)
+(args.buf + off);
+ret = get_bg_info(fd, _args, header->offset,
+  chunk->length);
+if (ret < 0)
+goto out;
+
+/*
+ * it should not happen.
+ */
+if (bg_sk->nr_items == 0)
+continue;
+
+bg_off = 0;
+bg_header = (struct btrfs_ioctl_search_header *)
+(bg_args.buf + bg_off);
+bg_off += sizeof(*bg_header);
+bg = (struct btrfs_block_group_item *)
+ (bg_args.buf + bg_off);
+
+flags = btrfs_block_group_flags(bg);
+if (flags & BTRFS_BLOCK_GROUP_DATA) {
+used = btrfs_block_group_used(bg);
+nr_data_bgs++;
+printf("%20llu%20s%20s\n",
+bg_header->objectid,
+pretty_size(bg_header->offset),
+pretty_size(used));
+total_free += bg_header->offset - used;
+if (min_used >= used) {
+min_used = used;
+free_of_min_used =
+bg_header->offset - used;
+bg_of_min_used =
+bg_header->objectid;
+}
+}
+}
+
+off += header->len;
+sk->min_offset = header->offset + header->len;
+}
+sk->nr_items = 4096;
+
+}
+
+if (nr_data_bgs <= 1) {
+printf("Data block groups in fs = %d, no need 

Re: Cross subvolume "cp --reflink"?

2016-10-19 Thread Hugo Mills
On Wed, Oct 19, 2016 at 07:52:14PM +0300, Andrei Borzenkov wrote:
> I get "Failed to clone: Invalid cross-device link". Is it expected?
> Basically this is (on openSUSE TW which has root on subvolume)
> 
> mount -o subvol=/ /dev/vda1 /mnt
> btrfs sub create /mnt/var/cache
> cp -a --reflink=always /var/cache/* /mnt/var/cache
> 
> Kernel 4.7.5-1-default.

   Yes, you're trying to copy across a mountpoint boundary. This isn't
going to work. You'll need to mount subvolid=0 on /mnt, and then use
the source and target of the cp *both* from within the /mnt subtree.

   Reflink operations (including cp --reflink=always, and the
reflink-aware mv) work across subvol boundaries, but not mount
boundaries.

   Hugo.

-- 
Hugo Mills | For months now, we have been making triumphant
hugo@... carfax.org.uk | retreats before a demoralised enemy who is advancing
http://carfax.org.uk/  | in utter disorder.
PGP: E2AB1DE4  |  Eric Frank Russell, Wasp


signature.asc
Description: Digital signature


Cross subvolume "cp --reflink"?

2016-10-19 Thread Andrei Borzenkov
I get "Failed to clone: Invalid cross-device link". Is it expected?
Basically this is (on openSUSE TW which has root on subvolume)

mount -o subvol=/ /dev/vda1 /mnt
btrfs sub create /mnt/var/cache
cp -a --reflink=always /var/cache/* /mnt/var/cache

Kernel 4.7.5-1-default.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: pass correct args to btrfs_async_run_delayed_refs()

2016-10-19 Thread David Sterba
On Wed, Oct 19, 2016 at 12:57:19PM +0800, Wang Xiaoguang wrote:
> hi,
> 
> On 10/18/2016 06:32 PM, Holger Hoffstätte wrote:
> > On Tue, 18 Oct 2016 15:56:13 +0800, Wang Xiaoguang wrote:
> >
> >> In btrfs_truncate_inode_items()->btrfs_async_run_delayed_refs(), we
> >> swap the arg2 and arg3 wrongly, fix this.
> >>
> >> Signed-off-by: Wang Xiaoguang 
> >> ---
> >>   fs/btrfs/inode.c | 4 ++--
> >>   1 file changed, 2 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> >> index 2b790bd..2f1372b 100644
> >> --- a/fs/btrfs/inode.c
> >> +++ b/fs/btrfs/inode.c
> >> @@ -4605,8 +4605,8 @@ int btrfs_truncate_inode_items(struct 
> >> btrfs_trans_handle *trans,
> >>BUG_ON(ret);
> >>if (btrfs_should_throttle_delayed_refs(trans, root))
> >>btrfs_async_run_delayed_refs(root,
> >> -   trans->transid,
> >> -  trans->delayed_ref_updates * 2, 0);
> >> +  trans->delayed_ref_updates * 2,
> >> +  trans->transid, 0);
> >>if (be_nice) {
> >>if (truncate_space_check(trans, root,
> >> extent_num_bytes)) {
> > Reviewed-by: Holger Hoffstätte 
> >
> > Passing the wrong transid..why did this ever work?
> Indeed this bug just impacts asynchronous delayed refs handle when we 
> truncate
> inodes.  In delayed_ref_async_start(), there is such codes:
> 
>  trans = btrfs_join_transaction(async->root);
>  if (trans->transid > async->transid)
>  goto end;
>  ret = btrfs_run_delayed_refs(trans, async->root, async->count);
> 
>  From this codes, we can see that this just influence whether can we handle
> delayed refs or the number of delayed refs to handle, this may impact
> performance, but will not result in missing delayed refs, all delayed 
> refs will
> be handled in btrfs_commit_transaction().

You should put this kind of information to the changelog itself, now
added.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] Btrfs: remove some no-op casts

2016-10-19 Thread David Sterba
On Wed, Oct 12, 2016 at 11:33:21AM +0300, Dan Carpenter wrote:
> We cast 0 to a u8 but then because of type promotion, it's immediately
> cast to int back to int before we do a bitwise negate.  The cast doesn't
> matter in this case, the code works as intended.  It causes a static
> checker warning though so let's remove it.
> 
> Signed-off-by: Dan Carpenter 

Applies to the free space tree fixes that are now in master.

Reviewed-by: David Sterba 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] btrfs: fix false enospc for compression

2016-10-19 Thread David Sterba
On Mon, Oct 17, 2016 at 05:01:46PM +0800, Wang Xiaoguang wrote:
> > [..]
> >>   int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
> >> -struct extent_state **cached_state);
> >> +struct extent_state **cached_state, int flag);
> >>   int btrfs_set_extent_defrag(struct inode *inode, u64 start, u64 end,
> >> -  struct extent_state **cached_state);
> >> +  struct extent_state **cached_state, int flag);
> > [..]
> >>   int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
> >>  struct page **pages, size_t num_pages,
> >>  loff_t pos, size_t write_bytes,
> >> -struct extent_state **cached);
> >> +struct extent_state **cached, int flag);
> > Instead of adding "int flag" why not use the already defined
> > btrfs_metadata_reserve_type enum? I know it's just an int at the end of
> > the day, but the dedupe support already added another "int dedupe" argument
> > and it's probably easy to cause confusion.
> > Maybe later it would be beneficial to consolidate the flags into a 
> > consistent
> > set of enum values to prevent more "int flag" inflation and better declare 
> > the
> > intent of the extent state change. Not sure if that makes sense.
> Yes, agree.
> I'll rebase them later, thanks.

Would be great. I won't manually merge the patch now as it's not a
conflict against the current state, btrfs_set_extent_delalloc has the
extra parameter already. Please consolidate them before this patch is
supposed to be merged. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Warning in __btrfs_free_extent due to -ENOENT backrefs

2016-10-19 Thread Nikolay Borisov
Hello, 

So I got the following warnings on one of my server: 

[361464.059606] [ cut here ]
[361464.059831] WARNING: CPU: 6 PID: 7637 at fs/btrfs/extent-tree.c:6543 
__btrfs_free_extent.isra.66+0x65c/0xcb0 [btrfs]()
[361464.064134] CPU: 6 PID: 7637 Comm: btrfs-cleaner Tainted: PW  O
4.4.22-clouder1 #12
[361464.064430] Hardware name: Supermicro 
X9DRi-LN4+/X9DR3-LN4+/X9DRi-LN4+/X9DR3-LN4+, BIOS 3.2 03/04/2015
[361464.064727]   880486e03b00 812f4999 

[361464.065213]  a09e6fda 880486e03b38 81052bd6 
fffe
[361464.065697]  02e77000 8805d4a30750 88000513e000 

[361464.066178] Call Trace:
[361464.066355]  [] dump_stack+0x67/0x9e
[361464.066531]  [] warn_slowpath_common+0x86/0xc0
[361464.066705]  [] warn_slowpath_null+0x1a/0x20
[361464.066890]  [] __btrfs_free_extent.isra.66+0x65c/0xcb0 
[btrfs]
[361464.067194]  [] ? btrfs_set_path_blocking+0x3f/0x70 
[btrfs]
[361464.067500]  [] __btrfs_run_delayed_refs+0x898/0x12c0 
[btrfs]
[361464.067810]  [] ? btrfs_add_delayed_extent_op+0x98/0xc0 
[btrfs]
[361464.068116]  [] ? btrfs_set_disk_extent_flags+0x85/0xc0 
[btrfs]
[361464.068423]  [] btrfs_run_delayed_refs+0x82/0x2f0 [btrfs]
[361464.068616]  [] ? free_extent_buffer+0x4b/0x90 [btrfs]
[361464.068807]  [] btrfs_should_end_transaction+0x59/0x60 
[btrfs]
[361464.069112]  [] btrfs_drop_snapshot+0x436/0x800 [btrfs]
[361464.072161]  [] 
btrfs_clean_one_deleted_snapshot+0xb3/0xf0 [btrfs]
[361464.072468]  [] cleaner_kthread+0x1e2/0x280 [btrfs]
[361464.072655]  [] ? check_leaf+0x340/0x340 [btrfs]
[361464.072833]  [] kthread+0xef/0x110
[361464.073009]  [] ? kthread_park+0x60/0x60
[361464.073188]  [] ret_from_fork+0x3f/0x70
[361464.073364]  [] ? kthread_park+0x60/0x60
[361464.073546] ---[ end trace 03d80b3e257b8bae ]---
[361464.073726] BTRFS info (device loop168): leaf 540753920 total ptrs 101 free 
space 42
[361464.074021] item 0 key (48619520 168 4096) itemoff 3944 itemsize 51
[361464.074198] extent refs 1 gen 684 flags 2
[361464.074370] tree block key (1690 1 0) level 0
[361464.074543] tree block backref root 354
[361464.074722] item 1 key (48623616 168 4096) itemoff 3893 itemsize 51
[361464.074898] extent refs 1 gen 671 flags 2
[361464.075070] tree block key (6027 1 0) level 1
[361464.075243] tree block backref root 351
[361464.075415] item 2 key (48627712 168 4096) itemoff 3842 itemsize 51
[361464.075591] extent refs 1 gen 671 flags 2
[361464.075767] tree block key (6066 1 0) level 0
[361464.075942] tree block backref root 351
[361464.076114] item 3 key (48631808 168 4096) itemoff 3791 itemsize 51
[361464.076294] extent refs 1 gen 671 flags 2
[361464.076466] tree block key (6071 96 65) level 0
[361464.076647] tree block backref root 351
[361464.076826] item 4 key (48635904 168 4096) itemoff 3740 itemsize 51
[361464.077003] extent refs 1 gen 671 flags 2
[361464.077175] tree block key (6070 96 4) level 0
[361464.077348] tree block backref root 351
[361464.077519] item 5 key (4864 168 4096) itemoff 3689 itemsize 51
[361464.077694] extent refs 1 gen 671 flags 2
[361464.077873] tree block key (6073 84 3593620995) level 0
[361464.078048] tree block backref root 351
[361464.078220] item 6 key (48644096 168 4096) itemoff 3638 itemsize 51
[361464.078397] extent refs 1 gen 671 flags 2
[361464.078568] tree block key (6076 96 2) level 0
[361464.078747] tree block backref root 351
[361464.078920] item 7 key (48648192 168 4096) itemoff 3587 itemsize 51
[361464.079097] extent refs 1 gen 671 flags 2
[361464.079269] tree block key (6082 84 4159721953) level 0
[361464.079443] tree block backref root 351
[361464.079615] item 8 key (48652288 168 4096) itemoff 3536 itemsize 51
[361464.079794] extent refs 1 gen 671 flags 2
[361464.079965] tree block key (6086 96 2) level 0
[361464.080134] tree block backref root 351
[361464.080303] item 9 key (48656384 168 4096) itemoff 3485 itemsize 51
[361464.080476] extent refs 1 gen 671 flags 2
[361464.080645] tree block key (6094 108 0) level 0
[361464.080821] tree block backref root 351
[361464.080991] item 10 key (48660480 168 4096) itemoff 3434 itemsize 51
[361464.081165] extent refs 1 gen 671 flags 2
[361464.081334] tree block key (6099 12 6071) level 0
[361464.081506] tree block backref root 351
[361464.081678] item 11 key (48664576 168 4096) 

Re: [PATCH 1/2] btrfs-progs: fsck: Add support to clear v1 free space cache.

2016-10-19 Thread David Sterba
On Thu, Oct 13, 2016 at 05:22:26PM +0800, Qu Wenruo wrote:
> Kernel clear_cache mount option will only rebuilt free space cache if
> used space of that chunk has changed.
> 
> So it won't ensure any corrupted free space cache get cleared.
> 
> So add a new option "--clear-space-cache v1|v2" to btrfsck, to
> completely wipe out free space cache.
> So kernel won't complain again.
> 
> Reported-by: Ivan P 
> Signed-off-by: Qu Wenruo 
> ---
>  Documentation/btrfs-check.asciidoc |   9 +++
>  cmds-check.c   |  63 ++-
>  free-space-cache.c | 124 
> +
>  free-space-cache.h |   2 +
>  4 files changed, 197 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/btrfs-check.asciidoc 
> b/Documentation/btrfs-check.asciidoc
> index a32e1c7..ef1e464 100644
> --- a/Documentation/btrfs-check.asciidoc
> +++ b/Documentation/btrfs-check.asciidoc
> @@ -78,6 +78,15 @@ respective superblock offset is within the device size
>  This can be used to use a different starting point if some of the primary
>  superblock is damaged.
>  
> +--clear-space-cache v1|v2::
> +completely wipe out all free space cache.
> +Only v1(file based) free space cache is supported yet.
> ++
> +NOTE: Kernel mount option 'clear_cache' is only designed to rebuild free 
> space cache
> +which is modified during the lifetime of that mount option.
> +It doesn't rebuild all free space cache, nor clear them out.
> +
> +
>  DANGEROUS OPTIONS
>  -
>  
> diff --git a/cmds-check.c b/cmds-check.c
> index 670ccd1..f62fc62 100644
> --- a/cmds-check.c
> +++ b/cmds-check.c
> @@ -11206,6 +11206,36 @@ out:
>   return bad_roots;
>  }
>  
> +static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
> +{
> + struct btrfs_trans_handle *trans;
> + struct btrfs_block_group_cache *bg_cache;
> + u64 current = 0;
> + int ret = 0;
> +
> + /* Clear all free space cache inodes and its extent data */
> + while (1) {
> + bg_cache = btrfs_lookup_first_block_group(fs_info, current);
> + if (!bg_cache)
> + break;
> + ret = btrfs_clear_free_space_cache(fs_info, bg_cache);

The function can fail for a lot of reasons, what would be the filesystem
state when we exit here? Some of the inodes could be cleared completely,
the last one partially.  The function copes with a missing inode item
but I don't know how many other intermediate states could be left.

> + if (ret < 0)
> + return ret;
> + current = bg_cache->key.objectid + bg_cache->key.offset;
> + }
> +
> + /* Don't forget to set cache_generation to -1 */
> + trans = btrfs_start_transaction(fs_info->tree_root, 0);
> + if (IS_ERR(trans)) {
> + error("failed to update super block cache generation");
> + return PTR_ERR(trans);
> + }
> + btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
> + btrfs_commit_transaction(trans, fs_info->tree_root);
> +
> + return ret;
> +}
> +
>  const char * const cmd_check_usage[] = {
>   "btrfs check [options] ",
>   "Check structural integrity of a filesystem (unmounted).",
> @@ -11233,6 +11263,9 @@ const char * const cmd_check_usage[] = {
>   "-r|--tree-root  use the given bytenr for the tree root",
>   "--chunk-rootuse the given bytenr for the chunk tree 
> root",
>   "-p|--progress   indicate progress",
> + "--clear-space-cache v1|v2   clear space cache for v1(file based) or ",
> + "v2(tree based).",
> + "Only support v1 yet",
>   NULL
>  };
>  
> @@ -11250,6 +11283,7 @@ int cmd_check(int argc, char **argv)
>   u64 num;
>   int init_csum_tree = 0;
>   int readonly = 0;
> + int clear_space_cache = 0;
>   int qgroup_report = 0;
>   int qgroups_repaired = 0;
>   unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
> @@ -11259,7 +11293,7 @@ int cmd_check(int argc, char **argv)
>   enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
>   GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
>   GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
> - GETOPT_VAL_MODE };
> + GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
>   static const struct option long_options[] = {
>   { "super", required_argument, NULL, 's' },
>   { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
> @@ -11279,6 +11313,8 @@ int cmd_check(int argc, char **argv)
>   { "progress", no_argument, NULL, 'p' },
>   { "mode", required_argument, NULL,
>   GETOPT_VAL_MODE },
> + { "clear-space-cache", required_argument, NULL,
> +  

Re: Monitoring Btrfs

2016-10-19 Thread Austin S. Hemmelgarn

On 2016-10-18 17:36, Anand Jain wrote:




I would like to monitor my btrfs-filesystem for missing drives.




This is actually correct behavior, the filesystem reports that it
should
have 6 devices, which is how it knows a device is missing.




 Missing - means missing at the time of mount. So how are you planning
to monitor a disk which is failed while in production ?



No, in `btrfs fi show` it means that it can't find the device.


 'btrfs fi show' is miss-leading as compared to 'btrfs fi show -m'
 -m tells btrfs-kernel perspective of the devices, as of now
 there is no code in the kernel which changes the device status
 while its mounted (expect for readonly, which is irrelevant in
 raid1 with 1 disk failed).
Actually, that's exactly how I would expect each of them to behave.  We 
need some way to get both the state the kernel thinks the FS is in, and 
the state it's actually in (according to the tools, not the kernel), and 
'-m' reporting kernel state while no '-m' reports actual state is 
exactly what I would expect in this case.


That leads also to another way I hadn't thought of to monitor a 
filesystem.  The output of 'fi show' with and without '-m' should match 
if the filesystem was healthy when mounted and is still healthy, if they 
don't, then something is wrong.



1. Filesystem flags.  These will change when the filesystem goes
degraded,


  Which flag is in question here. ?
I should clarify here, I mean the mount options, I'm just used to the 
monit terminology (which was not well picked in this case).  The big one 
to watch is the read-only flag, as BTRFS will force a filesystem 
read-only (which updates the mount options).  Any change to the mount 
options though without manual intervention is generally a sign that 
_something_ is wrong.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Monitoring Btrfs

2016-10-19 Thread Austin S. Hemmelgarn

On 2016-10-19 09:06, Anand Jain wrote:



On 10/19/16 19:15, Austin S. Hemmelgarn wrote:

On 2016-10-18 17:36, Anand Jain wrote:




I would like to monitor my btrfs-filesystem for missing drives.




This is actually correct behavior, the filesystem reports that it
should
have 6 devices, which is how it knows a device is missing.




 Missing - means missing at the time of mount. So how are you planning
to monitor a disk which is failed while in production ?



No, in `btrfs fi show` it means that it can't find the device.


 'btrfs fi show' is miss-leading as compared to 'btrfs fi show -m'
 -m tells btrfs-kernel perspective of the devices, as of now
 there is no code in the kernel which changes the device status
 while its mounted (expect for readonly, which is irrelevant in
 raid1 with 1 disk failed).



Actually, that's exactly how I would expect each of them to behave.  We
need some way to get both the state the kernel thinks the FS is in, and
the state it's actually in (according to the tools, not the kernel), and
'-m' reporting kernel state while no '-m' reports actual state is
exactly what I would expect in this case.




That leads also to another way I hadn't thought of to monitor a
filesystem.  The output of 'fi show' with and without '-m' should match
if the filesystem was healthy when mounted and is still healthy, if they
don't, then something is wrong.




1. Filesystem flags.  These will change when the filesystem goes
degraded,


  Which flag is in question here. ?

I should clarify here, I mean the mount options, I'm just used to the
monit terminology (which was not well picked in this case).  The big one
to watch is the read-only flag, as BTRFS will force a filesystem
read-only (which updates the mount options).  Any change to the mount
options though without manual intervention is generally a sign that
_something_ is wrong.



 btrfs-progs shouldn't add its own intelligence in determining the
 device state, it should be a transparent tool to report status from
 the btrfs-kernel. So I opposed to the patches such as

commit 206efb60cbe3049e0d44c6da3c1909aeee18f813
btrfs-progs: Add missing devices check for mounted btrfs.

 There are many ways a device can fail/recover in the SAN environment,
 these device state managing intelligence should be at one place and
 in the kernel. The volume manager part of the code in the kernel
 is incomplete.

I don't agree that the management should be completely unified or that 
the tools should just report kernel state.  The tools have to have some 
way to check device state for unmounted filesystems because they have to 
operate on unmounted filesystems, and because until the kernel gets 
smart enough to actually handle device state properly, some method is 
needed to check the actual state of the devices.  Even once the kernel 
is smart enough, it's still helpful to see without mounting a filesystem 
whether or not all the devices are there, and if we ever switch to a 
real mount helper (which I am in favor of for multiple reasons), we'll 
need device state checking in userspace for that too.


Take a look for at LVM.  The separation of responsibilities there is 
ideally what we should be looking at long term for BTRFS.  The userspace 
components tell the kernel what to do, and list both kernel state _and_ 
physical state in a readable manner.  The kernel tracks limited parts of 
the state (only for active LV's, so the equivalent of mounted 
filesystems, and even then only what it needs to track (Is this RAID 
volume in sync?  Is that snapshot or thin storage pool getting close to 
full?)), and sends notifications to a userspace component which then 
acts on those conditions (possibly then telling the kernel what to do in 
response to them).  On top of that, the userspace components don't 
require a kernel which supports them for any off-line operations, and 
the kernel works fine with older userspace.  Both userspace and the 
kernel handle missing devices (userspace tools report them, the kernel 
refuses to activate LV's that require them).

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] btrfs-progs: fsck-tests: Check if clear space cache works

2016-10-19 Thread David Sterba
On Thu, Oct 13, 2016 at 05:22:27PM +0800, Qu Wenruo wrote:
> +check_prereq mkfs.btrfs
> +
> +setup_root_helper
> +prepare_test_dev 1G
> +
> +tmp=$(mktemp)
> +run_check $SUDO_HELPER $TOP/mkfs.btrfs -f $TEST_DEV
> +run_check_mount_test_dev
> +
> +# Create files that takes at least 3 data chunks, while 
> +# can still be removed to create free space inside one chunk.
> +
> +for i in $(seq 0 6); do
> + run_check $SUDO_HELPER dd if=/dev/zero of=$TEST_MNT/file_${i} bs=1M \
> + count=64 > /dev/null 2>&1
> +done
> +sync
> +
> +# Remove file 1 3 5 to create holes
> +for i in $(seq 1 2 6); do

Use of seq in this case is questionable :)

> + run_check $SUDO_HELPER rm $TEST_MNT/file_${i}
> +done
> +
> +sync
> +
> +run_check_umount_test_dev
> +
> +# Clear space cache and re-check fs
> +run_check $TOP/btrfs check --clear-space-cache v1 $TEST_DEV
> +run_check $TOP/btrfs check $TEST_DEV
> +
> +# Manually recheck space cache and super space cache generation
> +run_check_stdout $TOP/btrfs inspect-internal dump-tree -t root $TEST_DEV \
> + > $tmp
> +grep -q FREE_SPACE $tmp

I've noticed you use the temporary file pattern. Please don't, unless
the temporary file is really used for some purpose.  I've
fixed that in previous patches.

> +if [ $? -eq 0 ]; then
> + rm $tmp
> + _fail "clear space cache doesn't clear all space cache"
> +fi
> +
> +run_check_stdout $TOP/btrfs inspect-internal dump-super $TEST_DEV |\
> + grep cache_generation > $tmp
> +
> +grep -q 18446744073709551615 $tmp

Same here.

> +if [ $? -ne 0 ]; then
> + rm $tmp
> + _fail "clear space cache doesn't set cache_generation correctly"
> +fi
> +rm $tmp
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/3] btrfs-progs: send: remove unnecessary code

2016-10-19 Thread David Sterba
On Wed, Oct 19, 2016 at 12:45:57PM +0900, Tsutomu Itoh wrote:
> Some unnecessary codes are deleted.
> 
>  - the setting of subvol is double.
>  - read only check was already done by previous loop.
> 
> Signed-off-by: Tsutomu Itoh 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Monitoring Btrfs

2016-10-19 Thread Anand Jain



On 10/19/16 19:15, Austin S. Hemmelgarn wrote:

On 2016-10-18 17:36, Anand Jain wrote:




I would like to monitor my btrfs-filesystem for missing drives.




This is actually correct behavior, the filesystem reports that it
should
have 6 devices, which is how it knows a device is missing.




 Missing - means missing at the time of mount. So how are you planning
to monitor a disk which is failed while in production ?



No, in `btrfs fi show` it means that it can't find the device.


 'btrfs fi show' is miss-leading as compared to 'btrfs fi show -m'
 -m tells btrfs-kernel perspective of the devices, as of now
 there is no code in the kernel which changes the device status
 while its mounted (expect for readonly, which is irrelevant in
 raid1 with 1 disk failed).



Actually, that's exactly how I would expect each of them to behave.  We
need some way to get both the state the kernel thinks the FS is in, and
the state it's actually in (according to the tools, not the kernel), and
'-m' reporting kernel state while no '-m' reports actual state is
exactly what I would expect in this case.




That leads also to another way I hadn't thought of to monitor a
filesystem.  The output of 'fi show' with and without '-m' should match
if the filesystem was healthy when mounted and is still healthy, if they
don't, then something is wrong.




1. Filesystem flags.  These will change when the filesystem goes
degraded,


  Which flag is in question here. ?

I should clarify here, I mean the mount options, I'm just used to the
monit terminology (which was not well picked in this case).  The big one
to watch is the read-only flag, as BTRFS will force a filesystem
read-only (which updates the mount options).  Any change to the mount
options though without manual intervention is generally a sign that
_something_ is wrong.



 btrfs-progs shouldn't add its own intelligence in determining the
 device state, it should be a transparent tool to report status from
 the btrfs-kernel. So I opposed to the patches such as

commit 206efb60cbe3049e0d44c6da3c1909aeee18f813
btrfs-progs: Add missing devices check for mounted btrfs.

 There are many ways a device can fail/recover in the SAN environment,
 these device state managing intelligence should be at one place and
 in the kernel. The volume manager part of the code in the kernel
 is incomplete.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: csum failed during copy/compare

2016-10-19 Thread Martin Dev
Fails on Antergos Linux 4.8.2-1-ARCH #1 SMP PREEMPT Mon Oct 17
08:11:46 CEST 2016 x86_64 GNU/Linux

btrfs-progs v4.8.1

On Mon, Oct 10, 2016 at 10:05 PM, Chris Murphy  wrote:
> On Mon, Oct 10, 2016 at 12:42 PM, Roman Mamedov  wrote:
>> On Mon, 10 Oct 2016 10:44:39 +0100
>> Martin Dev  wrote:
>>
>>> I work for system verification of SSDs and we've recently come up
>>> against an issue with BTRFS on Ubuntu 16.04
>>
>>> This seems to be a recent change
>>
>> ...well, a change in what?
>>
>> If you really didn't change anything on your machines and the used process,
>> there is no reason for anything to start breaking, other than obvious 
>> hardware
>> issues from age/etc (likely not what's happening here).
>>
>> So you most likely did change something yourself, and perhaps the change was
>> upgrading OS version, kernel version(!!!), or versions of software in 
>> general.
>>
>> As such, the first suggestion would be go through the recent software updates
>> history, maybe even restore an OS image you used three months ago (if
>> available) and confirm that the problem doesn't occur there. After that it's 
>> a
>> process called bisecting, there are tools for that, but likely you don't even
>> need those yet, just carefully note when you got which upgrades, paying
>> highest attention to the kernel version, and note at which point the
>> corruptions start to occur.
>
>
> There  have been various trim bugs, in Btrfs but also in the block
> layer. And I don't remember all the different versions involved.  I'd
> like to think 4.4.24 should behave the same as 4.8.1, so I would
> retest with those two, using something without ubuntu specific
> backports (i.e. something as close to the kernel.org trees of those
> versions as possible). I have no idea what Ubuntu generic 4.4.0-21
> translates into. Because of the 0, it makes me think it's literally
> 4.4.0 with 21 sets of various backports, from some unknown time frame
> without going and looking it up. If that's really 4.4.21, then it's
> weirdly named, I don't know why any distro would do that.
>
> In any case I would compare 4.8.1 and 4.4.24 because those two should
> work and if not it's a bug that needs to get fixed. Independently,
> check the SSD firmware. There have been bugs there also.
>
> --
> Chris Murphy
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] btrfs: Add WARN_ON for qgroup reserved underflow

2016-10-19 Thread David Sterba
On Fri, Sep 30, 2016 at 09:15:36AM +0800, Qu Wenruo wrote:
> While the reason why qgroup reserved space may underflow is still under
> investigation, such WARN_ON will help us to expose the bug more easily,
> and for end-user we can detect and avoid underflow.
> 
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/qgroup.c | 21 -
>  1 file changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 8db2e29..8532587 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -1061,8 +1061,12 @@ static int __qgroup_excl_accounting(struct 
> btrfs_fs_info *fs_info,
>   WARN_ON(sign < 0 && qgroup->excl < num_bytes);
>   qgroup->excl += sign * num_bytes;
>   qgroup->excl_cmpr += sign * num_bytes;
> - if (sign > 0)
> - qgroup->reserved -= num_bytes;
> + if (sign > 0) {
> + if (WARN_ON(qgroup->reserved < num_bytes))

That's only partially helpful, you should also print the numbers,
ref_root and/or qgroup id.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


WARNING at extent-tree.c:6945 __btrfs_free_extent.isra.33+0x7d6/0xca0 (Linux 4.8.0)

2016-10-19 Thread Dāvis Mosāns
Basically on multi-disk btrfs partition few sectors on one HDD became
unreadable and when trying to delete one folder on that filesystem I
got this in log

[12210.590339] WARNING: CPU: 4 PID: 3375 at
/mnt/linux/fs/btrfs/extent-tree.c:6945
__btrfs_free_extent.isra.33+0x7d6/0xca0 [btrfs]
[12210.590340] Modules linked in: fuse input_leds led_class
snd_usb_audio snd_usbmidi_lib snd_rawmidi snd_seq_device mousedev
joydev hid_generic usbhid hid xt_CHECKSUM ipt_MASQUERADE
nf_nat_masquerade_ipv4 tun cfg80211 rfkill nf_conntrack_netbios_ns
nf_conntrack_broadcast ip6t_REJECT nf_reject_ipv6 ip6t_rpfilter
xt_tcpudp ipt_REJECT nf_reject_ipv4 xt_conntrack ip_set nfnetlink
ebtable_broute ebtable_nat ip6table_nat nf_conntrack_ipv6
nf_defrag_ipv6 nf_nat_ipv6 ip6table_raw ip6table_security
ip6table_mangle iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4
nf_nat_ipv4 nf_nat nf_conntrack iptable_raw iptable_security
iptable_mangle ebtable_filter ebtables bridge ip6table_filter
ip6_tables stp iptable_filter llc it87 hwmon_vid kvm_amd kvm
saa7134_alsa irqbypass snd_hda_codec_realtek snd_hda_codec_generic
crct10dif_pclmul
[12210.590384]  snd_hda_codec_hdmi saa7134 crc32_pclmul tveeprom
rc_core ghash_clmulni_intel snd_hda_intel v4l2_common snd_hda_codec
aesni_intel videobuf2_dma_sg aes_x86_64 snd_hda_core videobuf2_memops
videobuf2_v4l2 lrw videobuf2_core gf128mul snd_hwdep videodev r8169
glue_helper evdev snd_pcm media ablk_helper cryptd mac_hid pcspkr
k10temp fam15h_power mxm_wmi snd_timer mii snd sp5100_tco soundcore
i2c_piix4 acpi_cpufreq tpm_infineon tpm_tis tpm_tis_core shpchp tpm
button wmi nfsd auth_rpcgss oid_registry nfs_acl sch_fq_codel lockd
grace sunrpc vboxnetflt(O) vboxnetadp(O) pci_stub vboxpci(O)
vboxdrv(O) nvidia_uvm(PO) btrfs xor zlib_deflate raid6_pq sd_mod
ata_generic pata_acpi ohci_pci serio_raw atkbd libps2 nvidia_drm(PO)
nvidia_modeset(PO) nvidia(PO) firewire_ohci xhci_pci firewire_core
ohci_hcd
[12210.590435]  ehci_pci xhci_hcd crc_itu_t ehci_hcd drm pata_atiixp
usbcore ahci libahci usb_common i2c_core i8042 serio mvsas libsas
scsi_transport_sas libata scsi_mod crc32c_generic crc32c_intel vfat
fat ip_tables x_tables
[12210.590453] CPU: 4 PID: 3375 Comm: kworker/u16:0 Tainted: P
  O4.8.0-ARCH-dirty #1
[12210.590454] Hardware name: Gigabyte Technology Co., Ltd.
GA-990FXA-UD3/GA-990FXA-UD3, BIOS FFe 11/08/2013
[12210.590471] Workqueue: btrfs-extent-refs btrfs_extent_refs_helper [btrfs]
[12210.590474]  0286 463ac1f2 8805bb097ae0
812e9bb0
[12210.590477]    8805bb097b20
8107bcab
[12210.590481]  1b219626bc00 009626bbf000 fffe
88061216ec80
[12210.590484] Call Trace:
[12210.590489]  [] dump_stack+0x63/0x83
[12210.590492]  [] __warn+0xcb/0xf0
[12210.590494]  [] warn_slowpath_null+0x1d/0x20
[12210.590508]  []
__btrfs_free_extent.isra.33+0x7d6/0xca0 [btrfs]
[12210.590522]  []
__btrfs_run_delayed_refs+0x5b0/0x12e0 [btrfs]
[12210.590525]  [] ? kmem_cache_free+0x1e0/0x210
[12210.590528]  [] ? cpuacct_charge+0x86/0xa0
[12210.590546]  [] ?
end_bio_extent_readpage+0x222/0x5f0 [btrfs]
[12210.590560]  [] btrfs_run_delayed_refs+0xa1/0x2a0 [btrfs]
[12210.590575]  [] delayed_ref_async_start+0x94/0xb0 [btrfs]
[12210.590592]  [] btrfs_scrubparity_helper+0x7d/0x340 [btrfs]
[12210.590608]  [] btrfs_extent_refs_helper+0xe/0x10 [btrfs]
[12210.590611]  [] process_one_work+0x1e5/0x470
[12210.590614]  [] worker_thread+0x48/0x4e0
[12210.590616]  [] ? process_one_work+0x470/0x470
[12210.590619]  [] kthread+0xd8/0xf0
[12210.590621]  [] ? __switch_to+0x300/0x720
[12210.590625]  [] ret_from_fork+0x1f/0x40
[12210.590628]  [] ? kthread_worker_fn+0x170/0x170
[12210.590630] ---[ end trace 293673cf94b7856a ]---
[12210.590633] BTRFS info (device sdo): leaf 33812980416512 total ptrs
213 free space 2101
[12210.590635] item 0 key (644872798208 168 524288) itemoff 16230
itemsize 53
[12210.590637] extent refs 1 gen 215 flags 1
[12210.590639] extent data backref root 261 objectid 783
offset 41418752 count 1
[12210.590641] item 1 key (644873322496 168 524288) itemoff 16177
itemsize 53
[12210.590642] extent refs 1 gen 215 flags 1
[12210.590643] extent data backref root 261 objectid 783
offset 41943040 count 1
[12210.590645] item 2 key (644873846784 168 524288) itemoff 16124
itemsize 53
[12210.590646] extent refs 1 gen 215 flags 1
[12210.590647] extent data backref root 261 objectid 783
offset 42467328 count 1
[12210.590649] item 3 key (644874371072 168 524288) itemoff 16071
itemsize 53
[12210.590650] extent refs 1 gen 215 flags 1
[12210.590651] extent data backref root 261 objectid 783
offset 42991616 count 1
[12210.590653] item 4 key (644874895360 168 524288) itemoff 16018
itemsize 53
[12210.590654] extent refs 1 gen 215 flags 1
[12210.590655] extent data backref root 261 objectid 783
offset 43515904 count 1
[12210.590657] item 5 key (644875419648 168 

Re: [PATCH v13 00/15] Btrfs In-band De-duplication

2016-10-19 Thread Qu Wenruo



At 10/17/2016 10:30 PM, David Sterba wrote:

On Thu, Sep 08, 2016 at 03:12:49PM +0800, Qu Wenruo wrote:

This patchset can be fetched from github:
https://github.com/adam900710/linux.git wang_dedupe_20160907


Can you please publish the patchset in a branch that does not change
name and is not based on for-next? I'm' going to do less manual merge
tests with Chandan's patchset. A 'dedupe-latest' would work for me,
otherwise feel free to keep the timestamped branches for your needs. The
base branch should be chris' integration (not necessarily the latest
one). Thanks.



Not a problem.

Since now the prepare patches are all in mainline, I can rebase them to 
either mainline non-rc release or Chris' for-linux-4.9.


Thanks,
Qu


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html