[RFC PATCH] btrfs: Remove 'objectid' member from struct btrfs_root

2018-08-05 Thread Misono Tomohiro
There are two members in struct btrfs_root which indicate root's
objectid: ->objectid and ->root_key.objectid.

They are both set to the same value in __setup_root():
  static void __setup_root(struct btrfs_root *root,
   struct btrfs_fs_info *fs_info,
   u64 objectid)
  {
...
root->objectid = objectid;
...
root->root_key.objectid = objecitd;
...
  }
and not changed to other value after initialization.

grep in btrfs directory shows both are used in many places:
  $ grep -rI "root->root_key.objectid" | wc -l
  133
  $ grep -rI "root->objectid" | wc -l
  55
 (4.17, inc. some noise)

It is confusing to have two similar variable names and it seems
that there is no rule about which should be used in a certain case.

Since ->root_key itself is needed for tree reloc tree, let's remove
'objecitd' member and unify code to use ->root_key.objectid in all places.

Signed-off-by: Misono Tomohiro 
---
Although being fundamentally independent, this is based on the
patch: https://patchwork.kernel.org/patch/10556485/
since it also touches root->objectid.

 fs/btrfs/backref.c   |  5 +++--
 fs/btrfs/btrfs_inode.h   |  8 
 fs/btrfs/ctree.c |  2 +-
 fs/btrfs/ctree.h |  1 -
 fs/btrfs/delayed-inode.c |  5 +++--
 fs/btrfs/disk-io.c   |  5 ++---
 fs/btrfs/export.c|  4 ++--
 fs/btrfs/inode.c |  2 +-
 fs/btrfs/ioctl.c |  2 +-
 fs/btrfs/qgroup.c| 23 ---
 fs/btrfs/ref-verify.c|  8 
 fs/btrfs/relocation.c|  3 ++-
 fs/btrfs/send.c  | 16 
 fs/btrfs/super.c |  6 --
 fs/btrfs/transaction.c   |  4 ++--
 include/trace/events/btrfs.h | 15 ---
 16 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index ae750b1574a2..84006e3dd105 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1468,7 +1468,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, 
u64 bytenr)
struct seq_list elem = SEQ_LIST_INIT(elem);
int ret = 0;
struct share_check shared = {
-   .root_objectid = root->objectid,
+   .root_objectid = root->root_key.objectid,
.inum = inum,
.share_count = 0,
};
@@ -2031,7 +2031,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root 
*fs_root,
/* path must be released before calling iterate()! */
btrfs_debug(fs_root->fs_info,
"following ref at offset %u for inode %llu in 
tree %llu",
-   cur, found_key.objectid, fs_root->objectid);
+   cur, found_key.objectid,
+   fs_root->root_key.objectid);
ret = iterate(parent, name_len,
  (unsigned long)(iref + 1), eb, ctx);
if (ret)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 1343ac57b438..97d91e55b70a 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -206,7 +206,7 @@ static inline struct btrfs_inode *BTRFS_I(const struct 
inode *inode)
 static inline unsigned long btrfs_inode_hash(u64 objectid,
 const struct btrfs_root *root)
 {
-   u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
+   u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME);
 
 #if BITS_PER_LONG == 32
h = (h >> 32) ^ (h & 0x);
@@ -339,15 +339,15 @@ static inline void btrfs_print_data_csum_error(struct 
btrfs_inode *inode,
struct btrfs_root *root = inode->root;
 
/* Output minus objectid, which is more meaningful */
-   if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
+   if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
btrfs_warn_rl(root->fs_info,
"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 
0x%08x mirror %d",
-   root->objectid, btrfs_ino(inode),
+   root->root_key.objectid, btrfs_ino(inode),
logical_start, csum, csum_expected, mirror_num);
else
btrfs_warn_rl(root->fs_info,
"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 
0x%08x mirror %d",
-   root->objectid, btrfs_ino(inode),
+   root->root_key.objectid, btrfs_ino(inode),
logical_start, csum, csum_expected, mirror_num);
 }
 
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d436fb4c002e..1f71695cb0a8 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -207,7 +207,7 @@ static void add_root_to_dirty_list(struct btrfs_root *root)
spin_lock(_info->trans_lock);
if (!test_and_set_bit(BTRFS_ROOT_DIRTY, >state)) {
 

[PATCH v3] btrfs: qgroup: Remove qgroup items along with subvolume deletion

2018-08-05 Thread Misono Tomohiro
When qgroup is on, subvolume deletion does not remove qgroup items
of the subvolume (qgroup info, limit, relation) from quota tree and
they need to get removed manually by "btrfs qgroup destroy".

Since level 0 qgroup cannot be used/inherited by any other subvolume,
let's remove them automatically when subvolume is deleted
(to be precise, when the subvolume root is dropped).

Reviewed-by: Lu Fengqi 
Reviewed-by: Qu Wenruo 
Signed-off-by: Misono Tomohiro 
---
v2 -> v3:
  Use root->root_key.objectid instead of root->objectid
  Add Reviewed-by tag

v1 -> v2:
  Move call of btrfs_remove_qgroup() from btrfs_delete_subvolume()
  to btrfs_snapshot_destroy() so that it will be called after the
  subvolume root is really dropped

 fs/btrfs/extent-tree.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e7b237b9547..48edf839ed2c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_root_item *root_item = >root_item;
struct walk_control *wc;
struct btrfs_key key;
+   u64 objectid = root->root_key.objectid;
int err = 0;
int ret;
int level;
bool root_dropped = false;
 
-   btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
+   btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
 
path = btrfs_alloc_path();
if (!path) {
@@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
 
-   if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+   if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_find_root(tree_root, >root_key, path,
  NULL, NULL);
if (ret < 0) {
@@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 *
 * The most common failure here is just -ENOENT.
 */
-   btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
+   btrfs_del_orphan_item(trans, tree_root, objectid);
}
}
 
@@ -9056,6 +9056,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_put_fs_root(root);
}
root_dropped = true;
+
+/* Remove level-0 qgroup items since no other subvolume can use them */
+   ret = btrfs_remove_qgroup(trans, objectid);
+   if (ret && ret != -EINVAL && ret != -ENOENT) {
+   btrfs_abort_transaction(trans, ret);
+   err = ret;
+   }
+
 out_end_trans:
btrfs_end_transaction_throttle(trans);
 out_free:
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] fs/btrfs/disk-io: Remove unneeded variable "err"

2018-08-05 Thread zhong jiang
On 2018/8/6 3:13, Nikolay Borisov wrote:
>
> On  5.08.2018 18:02, zhong jiang wrote:
>> The err is not used after initalization, So remove it and make
>> the function to be void function.
>>
>> Signed-off-by: zhong jiang 
> The ret value of this function is not checked by the sole caller
> (btrfs_cleanup_one_transaction), however, this does not give you the
> right to use a bulk commit message. If you want your future
> contributions to be taken seriously please put actual effort in properly
> describing your changes.
 I am sorry for that. I will merge the series into a patch. I will repost.

 Thanks,
 zhong jiang
>
>> ---
>>  fs/btrfs/disk-io.c | 12 
>>  1 file changed, 4 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 468365d..34e4926 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -53,8 +53,8 @@
>>  static const struct extent_io_ops btree_extent_io_ops;
>>  static void end_workqueue_fn(struct btrfs_work *work);
>>  static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
>> -static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
>> -  struct btrfs_fs_info *fs_info);
>> +static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
>> +   struct btrfs_fs_info *fs_info);
>>  static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
>>  static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
>>  struct extent_io_tree *dirty_pages,
>> @@ -4179,13 +4179,12 @@ static void btrfs_destroy_all_ordered_extents(struct 
>> btrfs_fs_info *fs_info)
>>  spin_unlock(_info->ordered_root_lock);
>>  }
>>  
>> -static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
>> -  struct btrfs_fs_info *fs_info)
>> +static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
>> +   struct btrfs_fs_info *fs_info)
>>  {
>>  struct rb_node *node;
>>  struct btrfs_delayed_ref_root *delayed_refs;
>>  struct btrfs_delayed_ref_node *ref;
>> -int ret = 0;
>>  
>>  delayed_refs = >delayed_refs;
>>  
>> @@ -4193,7 +4192,6 @@ static int btrfs_destroy_delayed_refs(struct 
>> btrfs_transaction *trans,
>>  if (atomic_read(_refs->num_entries) == 0) {
>>  spin_unlock(_refs->lock);
>>  btrfs_info(fs_info, "delayed_refs has NO entry");
>> -return ret;
>>  }
>>  
>>  while ((node = rb_first(_refs->href_root)) != NULL) {
>> @@ -4247,8 +4245,6 @@ static int btrfs_destroy_delayed_refs(struct 
>> btrfs_transaction *trans,
>>  }
>>  
>>  spin_unlock(_refs->lock);
>> -
>> -return ret;
>>  }
>>  
>>  static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
>>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] fs/btrfs/disk-io: Remove unneeded variable "err"

2018-08-05 Thread zhong jiang
On 2018/8/5 23:27, Joe Perches wrote:
> On Sun, 2018-08-05 at 23:02 +0800, zhong jiang wrote:
>> The err is not used after initalization, So remove it and make
>> the function to be void function.
> []
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> []
>> @@ -4193,7 +4192,6 @@ static int btrfs_destroy_delayed_refs(struct 
>> btrfs_transaction *trans,
>>  if (atomic_read(_refs->num_entries) == 0) {
>>  spin_unlock(_refs->lock);
>>  btrfs_info(fs_info, "delayed_refs has NO entry");
>> -return ret;
> Think a little more about this please.
> This is not a sensible removal.
 I am sorry for stupid mistake.  I  will repost.

Thanks,
zhong jiang
>>  }
>>  
>>  while ((node = rb_first(_refs->href_root)) != NULL) {
>
> .
>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/3] Remove uneeded variable "err"

2018-08-05 Thread zhong jiang
On 2018/8/6 3:14, Nikolay Borisov wrote:
>
> On  5.08.2018 18:02, zhong jiang wrote:
>> zhong jiang (3):
>>   fs/btrfs/disk-io: Remove unneeded variable "err"
>>   fs/btrfs/extent-tree: remove redudant variable "err"
>>   fs/btrfs/tree-log: remove the unneeded variable "err"
>>
>>  fs/btrfs/disk-io.c | 12 
>>  fs/btrfs/extent-tree.c |  6 ++
>>  fs/btrfs/tree-log.c|  5 +
>>  fs/btrfs/tree-log.h|  2 +-
>>  4 files changed, 8 insertions(+), 17 deletions(-)
>>
>
> NAK on the whole series on the basis of using a bulk commit message
> which doesn't really describe the changes of each individual commit. If
> you want those changes to eventually be merged resubmit the series with
> individual reasoning for every commit.
>
>
 It's sorry for using a bulk commit message. I will repost with a patch
 because it solves the same issue.

Thanks,
zhong jiang

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs-progs: ins: logical-resolve: Print message when path cannot be resolved

2018-08-05 Thread Misono Tomohiro
On 2018/08/04 0:06, David Sterba wrote:
> On Wed, Jul 25, 2018 at 05:20:17PM +0900, Misono Tomohiro wrote:
>> Since BTRFS_IOC_INO_PATHS requires fd of subvolume,
> 
> Does it? AFAICS btrfs_ioctl_ino_to_path gets root that's the containing
> subvolume of the path given by the user.

It is reverse; it returns all paths of given inode number in given subvolume fd.

> 
>> logical-resolve
>> cannot find the path when mount point is not FS_TREE
>> (because the subvolume path cannot be opened).
> 
> Sorry, I don't understand what's the problem here. What you write sounds
> like there's a reproducer. If yes, please post it.

Sure. If mountpoint is FS_TREE, everything is ok:

$ mkfs.btrfs -fq $DEV
$ mount $DEV /mnt

// create snapshot and hardlink
$ btrfs sub create /mnt/sub
$ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
$ btrfs sub snap /mnt/sub /mnt/snap
$ ln /mnt/sub/file /mnt/sub/file2
$ tree --inodes /mnt
/mnt
|-- [256]  snap
|   `-- [257]  file
`-- [256]  sub
|-- [257]  file
`-- [257]  file2

inode-resolve returns all paths of given inode number in given subvolume:
(it calls IOC_INO_PATHS)
$ btrfs ins inode-resolve 257 /mnt/sub
/mnt/sub/file
/mnt/sub/file2

$ btrfs ins inode-resolve 257 /mnt/snap
/mnt/snap/file

logical-resolve returns all paths containing given logical address in fs:
(it calls IOC_INO_PATHS after IOC_LOGICAL_INO)
$ filefrag -v /mnt/sub/file
Filesystem type is: 9123683e
File size of /mnt/sub/file is 1024000 (250 blocks of 4096 bytes)
 ext: logical_offset:physical_offset: length:   expected: flags:
   0:0.. 249:   3392..  3641:250: 
last,shared,eof
/mnt/sub/file: 1 extent found
$ btrfs ins logical-resolve $((3392*4096)) /mnt
/mnt/snap/file
/mnt/sub/file
/mnt/sub/file2


However, when mountpoint is not FS_TREE, above logical-resolve fails:

$ umount /mnt
$ mount -o subvol=sub $DEV /mnt
$ btrfs ins logical-resolve $((3392*4096)) /mnt
ERROR: cannot access '/mnt/snap': No such file or directory

The reasons are
(1) btrfs_list_path_for_root() returns paths from FS_TREE
(2) subvolume path cannot be opened and therefore cannot call IOC_INO_PATHS

(1) may be fixed for mounted subvolume, but if a subvolume is not accessible at 
all,
there is no way to call IOC_INO_PATHS.
To solve this, we need to pass arbitrary treeid just like INO_LOOKUP ioctl.

Thanks,
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: optimization to avoid ENOSPC for nocow writes after snapshot when low on data space

2018-08-05 Thread robbieko

Filipe Manana 於 2018-08-03 18:22 寫到:
On Fri, Aug 3, 2018 at 10:13 AM, robbieko  
wrote:

From: Robbie Ko 

Commit e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting")
forced writeback fallback to COW when subvolume is snapshotted.


Commit e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting") forced
nocow writes to fallback
to COW, during writeback, when a snapshot is created. This resulted in
writes made before creating
the snapshot to unexpectedly fail with ENOSPC during writeback when
success (0) was returned
to user space through the write system call.

The steps leading to this problem are:



1. When the space is full, write syscall will check if can
nocow, and space reservation will not happen.


1. When it's not possible to allocate data space for a write, the
buffered write path checks if
a NOCOW write is possible. If it is, it will not reserve space and
success (0) is returned to
user space.



2. Then snapshot happens before flushing IO (running dealloc),
we will increase will_be_snapshotted, and then when running
dealloc we fallback to COW and fail (ENOSPC).


2. Then when a snapshot is created, the root's will_be_snapshotted
atomic is incremented and writeback
is triggered for all inode's that belong to the root being
snapshotted. Incrementing that atomic forces
all previous writes to fallback to COW during writeback (running 
delalloc).


3. This results in the writeback for the inodes to fail and therefore
setting the ENOSPC error in their mappings,
so that a subsequent fsync on them will report the error to user
space. So it's not a completely silent data loss
(since fsync will report ENOSPC) but it's a very unexpected and
undesirable behaviour, because if a clean
shutdown/unmount of the filesystem happens without previous calls to
fsync, it is expected to have the data
present in the files after mounting the filesystem again.



So fix this by we add a snapshot_force_cow, this is used to
distinguish between write and writeback.


So fix this by adding a new atomic named snapshot_force_cow to the
root structure which prevents
this behaviour and works the following way:



1. Increase will_be_snapshotted, so that write force to the cow,
always need space reservation.


1. It is incremented when we start to create a snapshot after
triggering writeback and
before waiting for writeback to finish.



2. Flushing all dirty pages (running dealloc), then now writeback
is still flushed in nocow mode, make sure all ditry pages that might
not reserve space previously have flushed this time otherwise they
will fallback to cow mode and fail due to no space.


2. This new atomic is now what is used by writeback (running delalloc)
to decide whether we need to
fallback to COW or not. Because we incremented this new atomic after
triggering writeback in the snapshot
creation ioctl, we ensure that all buffered writes that happened
before snapshot creation will succeed and
not fallback to COW (which would make them fail with ENOSPC).



3. Increase snapshot_force_cow, since all new dirty pages are
guaranteed space reservation, when running dealloc we can safely
fallback to COW.


3. The existing atomic, will_be_snapshotted, is kept because it is
used to force new buffered writes, that
start after we started snapshotting, to reserve data space even when
NOCOW is possible.
This makes these writes fail early with ENOSPC when there's no
available space to allocate, preventing the
unexpected behaviour of writeback later failing with ENOSPC due to a
fallback to COW mode.



Fixes: e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting")
Signed-off-by: Robbie Ko 
---
 fs/btrfs/ctree.h   |  1 +
 fs/btrfs/disk-io.c |  1 +
 fs/btrfs/inode.c   | 26 +-
 fs/btrfs/ioctl.c   | 14 ++
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346a..663ce05 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1277,6 +1277,7 @@ struct btrfs_root {
int send_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
+   atomic_t snapshot_force_cow;

/* For qgroup metadata reserved space */
spinlock_t qgroup_meta_rsv_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 205092d..5573916 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1216,6 +1216,7 @@ static void __setup_root(struct btrfs_root 
*root, struct btrfs_fs_info *fs_info,

atomic_set(>log_batch, 0);
refcount_set(>refs, 1);
atomic_set(>will_be_snapshotted, 0);
+   atomic_set(>snapshot_force_cow, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eba61bc..263b852 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1275,7 +1275,7 @@ static noinline int run_delalloc_nocow(struct 
inode *inode,

u64 disk_num_bytes;
u64 

[PATCH] Btrfs: fix unexpected failure of nocow buffered writes after snapshotting when low on space

2018-08-05 Thread robbieko
From: Robbie Ko 

Commit e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting")
forced nocow writes to fallback to COW, during writeback,
when a snapshot is created. This resulted in writes made before
creating the snapshot to unexpectedly fail with ENOSPC during
writeback when success (0) was returned to user space through
the write system call.

The steps leading to this problem are:

1. When it's not possible to allocate data space for a write,
the buffered write path checks if a NOCOW write is possible.
If it is, it will not reserve space and success (0) is returned
to user space.

2. Then when a snapshot is created, the root's will_be_snapshotted
atomic is incremented and writeback is triggered for all inode's
that belong to the root being snapshotted. Incrementing that atomic
forces all previous writes to fallback to COW during writeback
(running delalloc).

3. This results in the writeback for the inodes to fail and therefore
setting the ENOSPC error in their mappings, so that a subsequent fsync
on them will report the error to user space. So it's not a completely
silent data loss (since fsync will report ENOSPC) but it's a very
unexpected and undesirable behaviour, because if a clean
shutdown/unmount of the filesystem happens without previous calls to
fsync, it is expected to have the data present in the files after
mounting the filesystem again.

So fix this by adding a new atomic named snapshot_force_cow to the
root structure which prevents this behaviour and works the following way:

1. It is incremented when we start to create a snapshot after
triggering writeback and before waiting for writeback to finish.

2. This new atomic is now what is used by writeback (running delalloc)
to decide whether we need to fallback to COW or not. Because we
incremented this new atomic after triggering writeback in the snapshot
creation ioctl, we ensure that all buffered writes that happened
before snapshot creation will succeed and not fallback to COW
(which would make them fail with ENOSPC).

3. The existing atomic, will_be_snapshotted, is kept because it is
used to force new buffered writes, that start after we started
snapshotting, to reserve data space even when NOCOW is possible.
This makes these writes fail early with ENOSPC when there's no
available space to allocate, preventing the unexpected behaviour
of writeback later failing with ENOSPC due to a fallback to COW mode.

Fixes: e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting")
Signed-off-by: Robbie Ko 
Reviewed-by: Filipe Manana 
---
 fs/btrfs/ctree.h   |  1 +
 fs/btrfs/disk-io.c |  1 +
 fs/btrfs/inode.c   | 26 +-
 fs/btrfs/ioctl.c   | 16 
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346a..663ce05 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1277,6 +1277,7 @@ struct btrfs_root {
int send_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
+   atomic_t snapshot_force_cow;
 
/* For qgroup metadata reserved space */
spinlock_t qgroup_meta_rsv_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 205092d..5573916 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1216,6 +1216,7 @@ static void __setup_root(struct btrfs_root *root, struct 
btrfs_fs_info *fs_info,
atomic_set(>log_batch, 0);
refcount_set(>refs, 1);
atomic_set(>will_be_snapshotted, 0);
+   atomic_set(>snapshot_force_cow, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eba61bc..263b852 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1275,7 +1275,7 @@ static noinline int run_delalloc_nocow(struct inode 
*inode,
u64 disk_num_bytes;
u64 ram_bytes;
int extent_type;
-   int ret, err;
+   int ret;
int type;
int nocow;
int check_prev = 1;
@@ -1407,11 +1407,9 @@ static noinline int run_delalloc_nocow(struct inode 
*inode,
 * if there are pending snapshots for this root,
 * we fall into common COW way.
 */
-   if (!nolock) {
-   err = btrfs_start_write_no_snapshotting(root);
-   if (!err)
-   goto out_check;
-   }
+   if (!nolock &&
+   
unlikely(atomic_read(>snapshot_force_cow)))
+   goto out_check;
/*
 * force cow if csum exists in the range.
 * this ensure that csum for a given extent are
@@ -1420,9 +1418,6 @@ static noinline int run_delalloc_nocow(struct inode 
*inode,
ret = 

[PATCH RESEND v8] Add cli and ioctl to forget scanned device(s)

2018-08-05 Thread Anand Jain
[applies on misc-next and for-next-20180801]

v8:
 Change log update in the kernel patch.

v7:
 Use struct btrfs_ioctl_vol_args (instead of struct
  btrfs_ioctl_vol_args_v2) as its inline with other ioctl
  btrfs-control
 The CLI usage/features remains same. However internally the ioctl flag
  is not required to delete all the unmounted devices. Instead leave
  btrfs_ioctl_vol_args::name NULL.

v6:
 Use the changed fn name btrfs_free_stale_devices().

 Change in title:
 Old v5:
 Cover-letter:
  [PATCH v5] Add cli and ioctl to ignore a scanned device
 Kernel:
  [PATCH v5] btrfs: introduce feature to ignore a btrfs device
 Progs:
  [PATCH v5] btrfs-progs: add 'btrfs device ignore' cli

v5:
  Adds feature to delete all stale devices
  Reuses btrfs_free_stale_devices() fn and so depends on the
patch-set [1] in the ML.
  Uses struct btrfs_ioctl_vol_args_v2 instead of
struct btrfs_ioctl_vol_args as arg
  Does the device path matching instead of btrfs_device matching
(we won't delete the mounted device as btrfs_free_stale_devices()
checks for it)
v4:
  No change. But as the ML thread may be confusing, so resend.
v3:
  No change. Send to correct ML.
v2:
  Accepts review from Nikolay, details are in the specific patch.
  Patch 1/2 is renamed from
[PATCH 1/2] btrfs: refactor btrfs_free_stale_device() to get device list 
delete
  to
[PATCH 1/2] btrfs: add function to device list delete

Adds cli and ioctl to forget a scanned device or forget all stale
devices in the kernel.

Anand Jain (1):
  btrfs: introduce feature to forget a btrfs device

 fs/btrfs/super.c   | 3 +++
 fs/btrfs/volumes.c | 9 +
 fs/btrfs/volumes.h | 1 +
 include/uapi/linux/btrfs.h | 2 ++
 4 files changed, 15 insertions(+)

Anand Jain (1):
  btrfs-progs: add cli to forget one or all scanned devices

 cmds-device.c | 58 ++
 ioctl.h   |  2 ++
 2 files changed, 60 insertions(+)

-- 
2.7.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs-progs: add cli to forget one or all scanned devices

2018-08-05 Thread Anand Jain
This patch adds cli
  btrfs device forget [dev]
to remove the given device structure in the kernel if the device
is unmounted. If no argument is given it shall remove all stale
(device which are not mounted) from the kernel.

Signed-off-by: Anand Jain 
---
 cmds-device.c | 58 ++
 ioctl.h   |  2 ++
 2 files changed, 60 insertions(+)

diff --git a/cmds-device.c b/cmds-device.c
index 86459d1b9564..49cfd4b41adb 100644
--- a/cmds-device.c
+++ b/cmds-device.c
@@ -326,6 +326,63 @@ out:
return !!ret;
 }
 
+static const char * const cmd_device_forget_usage[] = {
+   "btrfs device forget []",
+   "Forget a stale device or all stale devices in btrfs.ko",
+   NULL
+};
+
+static int btrfs_forget_devices(char *path)
+{
+   struct btrfs_ioctl_vol_args args;
+   int ret;
+   int fd;
+
+   fd = open("/dev/btrfs-control", O_RDWR);
+   if (fd < 0)
+   return -errno;
+
+   memset(, 0, sizeof(args));
+   if (path)
+   strncpy_null(args.name, path);
+   ret = ioctl(fd, BTRFS_IOC_FORGET_DEV, );
+   if (ret)
+   ret = -errno;
+   close(fd);
+   return ret;
+}
+
+static int cmd_device_forget(int argc, char **argv)
+{
+   char *path;
+   int ret = 0;
+
+   if (check_argc_max(argc - optind, 1))
+   usage(cmd_device_forget_usage);
+
+   if (argc == 1) {
+   ret = btrfs_forget_devices(NULL);
+   if (ret)
+   error("Can't forget: %s", strerror(-ret));
+   return ret;
+   }
+
+   path = canonicalize_path(argv[1]);
+   if (!path) {
+   error("Could not canonicalize path '%s': %s",
+   argv[1], strerror(errno));
+   return -ENOENT;
+   }
+
+   ret  = btrfs_forget_devices(path);
+   if (ret)
+   error("Can't forget '%s': %s", path, strerror(-ret));
+
+   free(path);
+
+   return ret;
+}
+
 static const char * const cmd_device_ready_usage[] = {
"btrfs device ready ",
"Check device to see if it has all of its devices in cache for 
mounting",
@@ -601,6 +658,7 @@ const struct cmd_group device_cmd_group = {
CMD_ALIAS },
{ "remove", cmd_device_remove, cmd_device_remove_usage, NULL, 0 
},
{ "scan", cmd_device_scan, cmd_device_scan_usage, NULL, 0 },
+   { "forget", cmd_device_forget, cmd_device_forget_usage, NULL, 0 
},
{ "ready", cmd_device_ready, cmd_device_ready_usage, NULL, 0 },
{ "stats", cmd_device_stats, cmd_device_stats_usage, NULL, 0 },
{ "usage", cmd_device_usage,
diff --git a/ioctl.h b/ioctl.h
index 709e996f401c..e27d80e09392 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -721,6 +721,8 @@ static inline char *btrfs_err_str(enum btrfs_err_code 
err_code)
   struct btrfs_ioctl_vol_args)
 #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \
+  struct btrfs_ioctl_vol_args)
 /* trans start and trans end are dangerous, and only for
  * use by applications that know how to avoid the
  * resulting deadlocks
-- 
2.7.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: introduce feature to forget a btrfs device

2018-08-05 Thread Anand Jain
Support for a new command 'btrfs dev forget [dev]' is proposed here
to undo the effects of 'btrfs dev scan [dev]'. For this purpose
this patch proposes to use ioctl #5 as it was empty.
IOW(BTRFS_IOCTL_MAGIC, 5, ..)
This patch adds new ioctl BTRFS_IOC_FORGET_DEV which can be sent from
the /dev/btrfs-control to forget one or all devices, (devices which are
not mounted) from the btrfs kernel.

The argument it takes is struct btrfs_ioctl_vol_args, and ::name can be
set to specify the device path. And all unmounted devices can be removed
from the kernel if no device path is provided.

Again, the devices are removed only if the relevant fsid aren't mounted.

This new cli can provide..
 . Release of unwanted btrfs_fs_devices and btrfs_devices memory if the
   device is not going to be mounted.
 . Ability to mount the device in degraded mode when one of the other
   device is corrupted like in split brain raid1.
 . Running test cases which requires btrfs.ko-reload if the rootfs
   is btrfs.

Signed-off-by: Anand Jain 
---
 fs/btrfs/super.c   | 3 +++
 fs/btrfs/volumes.c | 9 +
 fs/btrfs/volumes.h | 1 +
 include/uapi/linux/btrfs.h | 2 ++
 4 files changed, 15 insertions(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 67de3c0fc85b..470a32af474e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2244,6 +2244,9 @@ static long btrfs_control_ioctl(struct file *file, 
unsigned int cmd,
ret = PTR_ERR_OR_ZERO(device);
mutex_unlock(_mutex);
break;
+   case BTRFS_IOC_FORGET_DEV:
+   ret = btrfs_forget_devices(vol->name);
+   break;
case BTRFS_IOC_DEVICES_READY:
mutex_lock(_mutex);
device = btrfs_scan_one_device(vol->name, FMODE_READ,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8844904f9009..cd54a926141a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1208,6 +1208,15 @@ static int btrfs_read_disk_super(struct block_device 
*bdev, u64 bytenr,
return 0;
 }
 
+int btrfs_forget_devices(const char *path)
+{
+   mutex_lock(_mutex);
+   btrfs_free_stale_devices(strlen(path) ? path:NULL, NULL);
+   mutex_unlock(_mutex);
+
+   return 0;
+}
+
 /*
  * Look for a btrfs signature on a device. This may be called out of the mount 
path
  * and we are not allowed to call set_blocksize during the scan. The superblock
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 049619176831..1602b5faa7e7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -405,6 +405,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
   fmode_t flags, void *holder);
 struct btrfs_device *btrfs_scan_one_device(const char *path,
   fmode_t flags, void *holder);
+int btrfs_forget_devices(const char *path);
 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
 void btrfs_assign_next_active_device(struct btrfs_device *device,
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 5ca1d21fc4a7..b1be7f828cb4 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -836,6 +836,8 @@ enum btrfs_err_code {
   struct btrfs_ioctl_vol_args)
 #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \
+  struct btrfs_ioctl_vol_args)
 /* trans start and trans end are dangerous, and only for
  * use by applications that know how to avoid the
  * resulting deadlocks
-- 
2.7.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] btrfs: qgroup: Remove qgroup item along with subvolume deletion

2018-08-05 Thread Misono Tomohiro
On 2018/08/03 18:16, Lu Fengqi wrote:
> On Fri, Aug 03, 2018 at 11:39:28AM +0300, Nikolay Borisov wrote:
>>
>>
>> On  3.08.2018 11:37, Misono Tomohiro wrote:
>>> On 2018/08/03 16:15, Lu Fengqi wrote:
 On Fri, Aug 03, 2018 at 03:21:12PM +0900, Misono Tomohiro wrote:
> When qgroup is on, subvolume deletion does not remove qgroup item
> of the subvolume (qgroup info, limits, relation) from quota tree and
> they needs to get removed manually by "btrfs qgroup destroy".
>
> Since level 0 qgroup cannot be used/inherited by any other subvolume,
> let's remove them automatically when subvolume is deleted
> (to be precise, when the subvolume root is dropped).
>
> Signed-off-by: Misono Tomohiro 

 Looks good to me.

 Reviewed-by: Lu Fengqi 
>>>
>>> Thanks for the review.
>>>

 There is an off-topic question below.

> ---
> Note that btrfs/057 fails, but it is the problem of testcase.
> I will update it too.
>
> v1 -> v2:
>  Move call of btrfs_remove_qgroup() from btrfs_delete_subvolume()
>  to btrfs_snapshot_destroy() so that it will be called after the
>  subvolume root is really dropped
>
> fs/btrfs/extent-tree.c | 16 
> 1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 9e7b237b9547..b56dea8c8b9f 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>   struct btrfs_root_item *root_item = >root_item;
>   struct walk_control *wc;
>   struct btrfs_key key;
> + u64 objectid = root->objectid;
>   int err = 0;
>   int ret;
>   int level;
>   bool root_dropped = false;
>
> - btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
> + btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
>
>   path = btrfs_alloc_path();
>   if (!path) {
> @@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>   goto out_end_trans;
>   }
>
> - if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
> + if (objectid != BTRFS_TREE_RELOC_OBJECTID) {

 Here use root->objectid instead of root->root_key.objectid. If I recall
 correctly, the root->objectid and root->root_key.objectid are set to the
 identical value. I just wonder if there is any difference between the two
 "objectid"s after the btrfs_root was created?
>>>
>>> in __setup_root(root, fs_info, objectid):
>>> 
>>>   root->objectid = objectid;
>>> 
>>>   root->root_key.objectid = objectid;
>>> 
>>>
>>> and I don't see any update of objectid from "grep -r "root_key.objectid ="",
>>> I think it the same too (and fstests is ok), but any comment from
>>> those who more familiar with code is helpful.
>>
>> Perhaps root->objectid should be removed altogether, if it's a duplicate
>> of root->root_key.objectid
> 
> That's great! I hate these useless redundancies because they always make me
> confused. So Misono could you update this patch to use
> root->root_key.objectid?

Ok. Also I'll try to see if it is possible to remove root->objectid.
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BUG: scheduling while atomic

2018-08-05 Thread Qu Wenruo


On 2018年08月05日 22:09, James Courtier-Dutton wrote:
> I am seeing a server halt and require a manual restart that I think
> might be related to btrfs.
> I attach the kernel log from it, in the hope that someone will
> understand it better than me.
> Any clues?
> 
> https://paste.fedoraproject.org/paste/xSblK1RKANiwhKHQj31Cdw

When checking the dmesg, we should check for the first backtrace other
than later backtraces, which are normally caused by the first problem.

In your case, it's general protection happens at timerqueue_add(), which
is unrelated to btrfs at all.

Thanks,
Qu
> 
> 
> Kind Regards
> 
> James
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 



signature.asc
Description: OpenPGP digital signature


Hanging btrfs-transaction

2018-08-05 Thread Zoltan
Dear Btrfs Experts,

I would like to ask for advice regarding the following problem:

I regularly do incremental backups of snapshots using commands like:

btrfs send -p /volumes/root/snapshots/2018-08-02
/volumes/root/snapshots/2018-08-03 | btrfs receive
/volumes/sj-backup/root

Recently the command started hanging with the following message
showing up in dmesg:

[136055.426170] INFO: task btrfs-transacti:13368 blocked for more than
120 seconds.
[136055.426181]   Tainted: P   OE4.15.0-29-generic #31-Ubuntu
[136055.426186] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[136055.426192] btrfs-transacti D0 13368  2 0x8000
[136055.426199] Call Trace:
[136055.426215]  __schedule+0x291/0x8a0
[136055.426224]  schedule+0x2c/0x80
[136055.426283]  btrfs_commit_transaction+0x81d/0x8f0 [btrfs]
[136055.426291]  ? wait_woken+0x80/0x80
[136055.426342]  transaction_kthread+0x18d/0x1b0 [btrfs]
[136055.426351]  kthread+0x121/0x140
[136055.426395]  ? btrfs_cleanup_transaction+0x560/0x560 [btrfs]
[136055.426402]  ? kthread_create_worker_on_cpu+0x70/0x70
[136055.426409]  ret_from_fork+0x35/0x40

This message is repeated 6 times. The first time this happened, I
waited a few hours for the command to finish (usually it only takes a
few minutes), but finally I gave up waiting and rebooted. When I ran
the command again, I got the same result.

I have another backup disk stored in a different location and from
time to time I swap them. I swapped them just to test whether the
problem is on the sending or the receiving side and with the other
disk everything works fine. I swapped them back and the problem
returned.

I have tried scrubbing the disk, it did not find any errors.
Interestingly, when I removed a few older snapshots from the target
disk, the command started working again, but a few days later, it
started hanging again. While it was working, I removed some more older
snapshots, which may or may not have been the trigger behind the
problem resurfacing.

I'm using Ubuntu 18.04, kernel 4.15.0-29-generic.

Thanks,

Zoltan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


BUG: scheduling while atomic

2018-08-05 Thread James Courtier-Dutton
I am seeing a server halt and require a manual restart that I think
might be related to btrfs.
I attach the kernel log from it, in the hope that someone will
understand it better than me.
Any clues?

https://paste.fedoraproject.org/paste/xSblK1RKANiwhKHQj31Cdw


Kind Regards

James
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 4/4] btrfs: undelete: Add the btrfs_ioctl_undelete

2018-08-05 Thread Lu Fengqi
The function will traverse the root from the fs_info->dead_roots and try
to call btrfs_undelete_subvolume() to recover them.

Note: It will lock fs_info->cleaner_mutex to keep the cleaner kthread
from deleting the subvolume which we want to recover.

Signed-off-by: Lu Fengqi 
---
 fs/btrfs/ioctl.c   | 83 ++
 include/uapi/linux/btrfs.h |  9 +
 2 files changed, 92 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7a11c4f8e450..83b9839799d0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1980,6 +1980,87 @@ static int btrfs_undelete_subvolume(const struct path 
*parent,
return ret;
 }
 
+static int btrfs_ioctl_undelete(struct file *file, void __user *argp)
+{
+   struct btrfs_ioctl_undelete_args __user *uarg;
+   struct btrfs_ioctl_undelete_args *args;
+   struct inode *inode = file_inode(file);
+   struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+   struct btrfs_root *root, *tmp;
+   char *name;
+   u64 count = 0;
+   u64 objectid;
+   int err = 0, ret;
+
+   /* copy search header and buffer size */
+   uarg = (struct btrfs_ioctl_undelete_args __user *)argp;
+   args = memdup_user(uarg, sizeof(*args));
+   if (IS_ERR(args))
+   return PTR_ERR(args);
+   args->name[BTRFS_PATH_NAME_MAX] = '\0';
+
+   name = kzalloc(BTRFS_PATH_NAME_MAX + 1, GFP_KERNEL);
+   if (IS_ERR(name)) {
+   err = PTR_ERR(name);
+   goto free_args;
+   }
+
+   if (!capable(CAP_SYS_ADMIN)) {
+   err = -EPERM;
+   goto free;
+   }
+
+   err = mnt_want_write_file(file);
+   if (err)
+   goto free;
+
+   /* Lock cleaner_mutex to prevent the cleaner kthread from deleting the
+* subvolume we want to recover so that we can perform the next rescue
+* in a relaxed manner.
+*/
+   mutex_lock(_info->cleaner_mutex);
+
+   list_for_each_entry_safe(root, tmp, _info->dead_roots, root_list) {
+   objectid = root->root_key.objectid;
+   snprintf(name, BTRFS_PATH_NAME_MAX, "%s%llu", args->name,
+   objectid);
+   ret = btrfs_undelete_subvolume(>f_path, root, name,
+  strlen(name));
+   if (ret)
+   continue;
+
+   /*
+* Feel free to remove this root from dead_root list since we
+* have recover it successfully.
+*/
+   spin_lock(_info->trans_lock);
+   list_del_init(>root_list);
+   spin_unlock(_info->trans_lock);
+
+   if ((count + 1) * sizeof(objectid) > args->buf_size)
+   continue;
+
+   /* copy the subvolume id to user space */
+   ret = copy_to_user(>buf[count], ,
+  sizeof(objectid));
+   if (ret)
+   err = -EFAULT;
+   count++;
+   }
+
+   mutex_unlock(_info->cleaner_mutex);
+   mnt_drop_write_file(file);
+
+   /* copy the count to user space */
+   if (copy_to_user(>count, , sizeof(count)))
+   err = -EFAULT;
+free:
+   kfree(name);
+free_args:
+   kfree(args);
+   return err;
+}
+
 static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
void __user *arg)
 {
@@ -6089,6 +6170,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_subvol_rootref(file, argp);
case BTRFS_IOC_INO_LOOKUP_USER:
return btrfs_ioctl_ino_lookup_user(file, argp);
+   case BTRFS_IOC_SUBVOL_UNDELETE:
+   return btrfs_ioctl_undelete(file, argp);
}
 
return -ENOTTY;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 5ca1d21fc4a7..25d030687b27 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -816,6 +816,13 @@ struct btrfs_ioctl_get_subvol_rootref_args {
__u8 align[7];
 };
 
+struct btrfs_ioctl_undelete_args {
+   char name[BTRFS_PATH_NAME_MAX + 1]; /* in - subvolume name prefix */
+   __u64 buf_size; /* in - size of buffer */
+   __u64 count;/* out - store number of recoverd subvolumes */
+   __u64 buf[0];   /* out - store ids of recoverd subolumes */
+};
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -940,5 +947,7 @@ enum btrfs_err_code {
struct btrfs_ioctl_get_subvol_rootref_args)
 #define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
struct btrfs_ioctl_ino_lookup_user_args)
+#define BTRFS_IOC_SUBVOL_UNDELETE _IOWR(BTRFS_IOCTL_MAGIC, 63, \
+  

[RFC PATCH 0/4] undelete subvolume online version

2018-08-05 Thread Lu Fengqi
This patchset will add the BTRFS_IOC_SUBVOL_UNDELETE ioctl for online
btrfs subvolume undelete.

And btrfs subvolume undelete subcommand was added to btrfs-progs.

So user can use the following command to recover all the subolume that
is left on the device. The recovered subvolume will be link to  dir
named to .

# btrfs subvolume undelete [-p ] 

btrfs online undelete version:
https://github.com/littleroad/linux.git undelete

btrfs-progs online undelete version:
https://github.com/littleroad/btrfs-progs.git online_undelete

Issue: #82

Lu Fengqi (4):
  btrfs: factor out btrfs_link_subvol from create_subvol
  btrfs: don't BUG_ON() in btrfs_link_subvol()
  btrfs: undelete: introduce btrfs_undelete_subvolume
  btrfs: undelete: Add the btrfs_ioctl_undelete

 fs/btrfs/ioctl.c   | 270 +
 include/uapi/linux/btrfs.h |   9 ++
 2 files changed, 255 insertions(+), 24 deletions(-)

-- 
2.18.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 1/4] btrfs: factor out btrfs_link_subvol from create_subvol

2018-08-05 Thread Lu Fengqi
The function btrfs_link_subvol is responsible to link the subvolume to
the specified directory, which is the opposite of what
btrfs_unlink_subvol does.

No functional change.

Signed-off-by: Lu Fengqi 
---
 fs/btrfs/ioctl.c | 65 ++--
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d3a5d2a41e5f..d37c26f69112 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -542,6 +542,45 @@ int btrfs_is_empty_uuid(u8 *uuid)
return 1;
 }
 
+static int btrfs_link_subvol(struct btrfs_trans_handle *trans,
+struct inode *dir, u64 objectid, const char *name,
+int namelen)
+{
+   struct btrfs_root *root = BTRFS_I(dir)->root;
+   struct btrfs_key key;
+   u64 index = 0;
+   int ret;
+
+   /*
+* insert the directory item
+*/
+   ret = btrfs_set_inode_index(BTRFS_I(dir), );
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   return ret;
+   }
+
+   key.objectid = objectid;
+   key.type = BTRFS_ROOT_ITEM_KEY;
+   key.offset = -1;
+   ret = btrfs_insert_dir_item(trans, root, name, namelen, BTRFS_I(dir),
+   , BTRFS_FT_DIR, index);
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   return ret;
+   }
+
+   btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
+   ret = btrfs_update_inode(trans, root, dir);
+   BUG_ON(ret);
+
+   ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
+btrfs_ino(BTRFS_I(dir)), index, name, namelen);
+   BUG_ON(ret);
+
+   return ret;
+}
+
 static noinline int create_subvol(struct inode *dir,
  struct dentry *dentry,
  const char *name, int namelen,
@@ -563,7 +602,6 @@ static noinline int create_subvol(struct inode *dir,
int err;
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
-   u64 index = 0;
uuid_le new_uuid;
 
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
@@ -677,30 +715,9 @@ static noinline int create_subvol(struct inode *dir,
new_root->highest_objectid = new_dirid;
mutex_unlock(_root->objectid_mutex);
 
-   /*
-* insert the directory item
-*/
-   ret = btrfs_set_inode_index(BTRFS_I(dir), );
-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
-   goto fail;
-   }
-
-   ret = btrfs_insert_dir_item(trans, root,
-   name, namelen, BTRFS_I(dir), ,
-   BTRFS_FT_DIR, index);
-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
+   ret = btrfs_link_subvol(trans, dir, objectid, name, namelen);
+   if (ret)
goto fail;
-   }
-
-   btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
-   ret = btrfs_update_inode(trans, root, dir);
-   BUG_ON(ret);
-
-   ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
-btrfs_ino(BTRFS_I(dir)), index, name, namelen);
-   BUG_ON(ret);
 
ret = btrfs_uuid_tree_add(trans, root_item->uuid,
  BTRFS_UUID_KEY_SUBVOL, objectid);
-- 
2.18.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 3/4] btrfs: undelete: introduce btrfs_undelete_subvolume

2018-08-05 Thread Lu Fengqi
The function will do the following things which are almost the opposite
of what btrfs_delete_subvolume() does:

1. link the subvolume to the parent specified;
2. clear root flag and set root_refs to 1;
3. add the subvol to the uuid_tree;
4. delete the orphan_item.

Signed-off-by: Lu Fengqi 
---
 fs/btrfs/ioctl.c | 116 +++
 1 file changed, 116 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e0b5a8fb15e7..7a11c4f8e450 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1864,6 +1864,122 @@ static noinline int btrfs_ioctl_snap_create_v2(struct 
file *file,
return ret;
 }
 
+static int btrfs_undelete_subvolume(const struct path *parent,
+   struct btrfs_root *root,
+   const char *name, int namelen)
+{
+   struct inode *dir = d_inode(parent->dentry);
+   struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+   struct btrfs_root_item *root_item = >root_item;
+   struct btrfs_trans_handle *trans;
+   struct btrfs_block_rsv block_rsv;
+   struct dentry *dentry;
+   struct inode *inode;
+   u64 root_flags;
+   int ret;
+
+   btrfs_debug(fs_info, "Undelete subvolume %llu",
+   root->root_key.objectid);
+
+   /* only care about the intact subvolume */
+   if (btrfs_disk_key_objectid(_item->drop_progress) != 0)
+   return 0;
+
+   /* root_refs of destination parent root must not be 0 */
+   if (btrfs_root_refs(_I(dir)->root->root_item) == 0)
+   return -ENOENT;
+
+   ret = down_write_killable_nested(>i_rwsem, I_MUTEX_PARENT);
+   if (ret == -EINTR)
+   return ret;
+
+   dentry = lookup_one_len(name, parent->dentry, namelen);
+   if (IS_ERR(dentry)) {
+   ret = PTR_ERR(dentry);
+   goto out_unlock;
+   }
+
+   down_write(_info->subvol_sem);
+
+   ret = btrfs_may_create(dir, dentry);
+   if (ret)
+   goto out_up_write;
+
+   ret = btrfs_check_dir_item_collision(root, dir->i_ino, name, namelen);
+   if (ret)
+   goto out_up_write;
+
+   btrfs_init_block_rsv(_rsv, BTRFS_BLOCK_RSV_TEMP);
+   /*
+* 1 - parent dir inode
+* 2 - dir entries
+* 2 - root ref/backref
+* 1 - UUID item
+*/
+   ret = btrfs_subvolume_reserve_metadata(root, _rsv, 6, false);
+   if (ret)
+   goto out_up_write;
+
+   trans = btrfs_start_transaction(BTRFS_I(dir)->root, 0);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   btrfs_subvolume_release_metadata(fs_info, _rsv);
+   goto out_up_write;
+   }
+
+   trans->block_rsv = _rsv;
+   trans->bytes_reserved = block_rsv.size;
+
+   ret = btrfs_link_subvol(trans, dir, root->root_key.objectid, name,
+   namelen);
+   if (ret)
+   goto fail;
+
+   /* clear BTRFS_ROOT_SUBVOL_DEAD root flag and set root_refs to 1*/
+   root_flags = btrfs_root_flags(root_item);
+   btrfs_set_root_flags(root_item,
+root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
+   btrfs_set_root_refs(root_item, 1);
+   ret = btrfs_update_root(trans, fs_info->tree_root,
+   >root_key, >root_item);
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   goto fail;
+   }
+
+   ret = btrfs_uuid_tree_add(trans, root_item->uuid, BTRFS_UUID_KEY_SUBVOL,
+ root->root_key.objectid);
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   goto fail;
+   }
+
+   ret = btrfs_del_orphan_item(trans, fs_info->tree_root,
+   root->root_key.objectid);
+   if (ret && ret != -ENOENT) {
+   btrfs_abort_transaction(trans, ret);
+   goto fail;
+   }
+fail:
+   trans->block_rsv = NULL;
+   trans->bytes_reserved = 0;
+   btrfs_subvolume_release_metadata(fs_info, _rsv);
+   ret = btrfs_commit_transaction(trans);
+   if (!ret) {
+   inode = btrfs_lookup_dentry(dir, dentry);
+   if (IS_ERR(inode))
+   return PTR_ERR(inode);
+   d_instantiate(dentry, inode);
+   fsnotify_mkdir(dir, dentry);
+   }
+out_up_write:
+   up_write(_info->subvol_sem);
+   dput(dentry);
+out_unlock:
+   inode_unlock(dir);
+   return ret;
+}
+
 static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
void __user *arg)
 {
-- 
2.18.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 2/4] btrfs: don't BUG_ON() in btrfs_link_subvol()

2018-08-05 Thread Lu Fengqi
Both of btrfs_update_inode() and btrfs_add_root_ref() may fail because
of ENOMEM. So there's no reason to panic here, we can replace BUG_ON()
with btrfs_abort_transaction() here.

Signed-off-by: Lu Fengqi 
---
 fs/btrfs/ioctl.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d37c26f69112..e0b5a8fb15e7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -572,11 +572,17 @@ static int btrfs_link_subvol(struct btrfs_trans_handle 
*trans,
 
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
-   BUG_ON(ret);
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   return ret;
+   }
 
ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
-   BUG_ON(ret);
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   return ret;
+   }
 
return ret;
 }
-- 
2.18.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html