[PATCH 6/8] btrfs-progs: sub show: Allow non-privileged user to call "subvolume show"

2018-11-26 Thread Misono Tomohiro
Allow non-privileged user to call subvolume show if new ioctls
(BTRFS_IOC_GET_SUBVOL_INFO/BTRFS_IOC_GET_SUBVOL_ROOTREF,
BTRFS_IOC_INO_LOOKUP_USER, from kernel 4.18) are available.
Non-privileged user still cannot use -r or -u option.

The behavior for root user is the same as before.

There are some output differences between root and user:
  root ... subvolume path is from top-level subvolume
   list all snapshots in the fs (inc. non-accessible ones)
  user ... subvolume path is absolute path
   list snapshots under the mountpoint
   (only to which the user has appropriate access right)

[Example]
 $ sudo mkfs.btrfs -f $DEV
 $ sudo mount $DEV /mnt

 $ sudo btrfs subvolume create /mnt/AAA
 $ sudo btrfs subvolume snapshot /mnt/AAA /mnt/snap1
 $ sudo btrfs subvolume snapshot /mnt/AAA /mnt/AAA/snap2

 $ sudo umount /mnt
 $ sudo mount -o subvol=AAA $DEV /mnt

 # root
 $ sudo btrfs subvolume show /mnt
 AAA
  Name:AAA
  UUID:15e80697-2ffb-0b4b-8e1e-e0873a7cf944
  ...
  Snapshot(s):
   AAA/snap2
   snap1

 # non-privileged user
 $ btrfs subvolume show /mnt
 /mnt
  Name:AAA
  UUID:15e80697-2ffb-0b4b-8e1e-e0873a7cf944
  ...
  Snapshot(s):
   /mnt/snap2

Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 Documentation/btrfs-subvolume.asciidoc |  11 ++-
 cmds-subvolume.c   | 107 ++---
 2 files changed, 105 insertions(+), 13 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index 428a2faa..ea8e9554 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -182,12 +182,19 @@ The id can be obtained from *btrfs subvolume list*, 
*btrfs subvolume show* or
 *show* [options] |::
 Show information of a given subvolume in the .
 +
+This command had required root privileges. From kernel 4.18,
+non-privileged user can call this unless -r/-u option is not used.
+Note that for root, output path is relative to the top-level subvolume
+while absolute path is shown for non-privileged user.
+Also for root, snapshots filed lists all the snapshots in the fs while
+only snapshots under mount point are shown for non-privileged user.
++
 `Options`
 +
 -r|--rootid
-rootid of the subvolume.
+rootid of the subvolume (require root privileges).
 -u|--uuid:::
-UUID of the subvolume.
+UUID of the subvolume (require root privileges).
 
 +
 If no option is specified, subvolume information of  is shown,
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index cd2e4425..ab1f14a2 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1886,8 +1886,12 @@ static int cmd_subvol_find_new(int argc, char **argv)
 static const char * const cmd_subvol_show_usage[] = {
"btrfs subvolume show [options] |",
"Show more information about the subvolume",
-   "-r|--rootid   rootid of the subvolume",
-   "-u|--uuid uuid of the subvolume",
+   "",
+   "This command had required root privileges. From kernel 4.18,",
+   "non-privileged user can call this unless -r/-u option is not used.",
+   "",
+   "-r|--rootid   rootid of the subvolume (require root privileges)",
+   "-u|--uuid uuid of the subvolume   (require root privileges)",
"",
"If no option is specified,  will be shown, otherwise",
"the rootid or uuid are resolved relative to the  path.",
@@ -1900,8 +1904,10 @@ static int cmd_subvol_show(int argc, char **argv)
char uuidparse[BTRFS_UUID_UNPARSED_SIZE];
char *fullpath = NULL;
int fd = -1;
+   int fd_mnt = -1;
int ret = 1;
DIR *dirstream1 = NULL;
+   DIR *dirstream_mnt = NULL;
int by_rootid = 0;
int by_uuid = 0;
u64 rootid_arg = 0;
@@ -1909,7 +1915,10 @@ static int cmd_subvol_show(int argc, char **argv)
struct btrfs_util_subvolume_iterator *iter;
struct btrfs_util_subvolume_info subvol;
char *subvol_path = NULL;
+   char *subvol_name = NULL;
+   char *mount_point = NULL;
enum btrfs_util_error err;
+   bool root;
 
optind = 0;
while (1) {
@@ -1947,6 +1956,12 @@ static int cmd_subvol_show(int argc, char **argv)
usage(cmd_subvol_show_usage);
}
 
+   root = is_root();
+   if (!root && (by_rootid || by_uuid)) {
+   error("only root can use -r or -u options");
+   return -1;
+   }
+
fullpath = realpath(argv[optind], NULL);
if (!fullpath) {
error("cannot find real path for '%s': %m", argv[optind]);
@@ -2001,19 +2016,53 @@ static int cmd_subvol_show(int argc, char **argv)
goto out;
   

[PATCH 1/8] btrfs-progs: sub list: Use libbtrfsuitl for subvolume list

2018-11-26 Thread Misono Tomohiro
This is a copy of non-merged following patch originally written
by Omar Sandoval:
  btrfs-progs: use libbtrfsutil for subvolume list
expect this commit keeps libbtrfs implementation which above commit
tries to remove (therefore this adds suffix _v2 for struct/function).

Original Author: Omar Sandoval 
Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 cmds-subvolume.c | 963 +--
 1 file changed, 936 insertions(+), 27 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index a8395aac..84a03fd8 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -407,6 +407,915 @@ keep_fd:
return ret;
 }
 
+#define BTRFS_LIST_NFILTERS_INCREASE   (2 * BTRFS_LIST_FILTER_MAX)
+#define BTRFS_LIST_NCOMPS_INCREASE (2 * BTRFS_LIST_COMP_MAX)
+
+struct listed_subvol {
+   struct btrfs_util_subvolume_info info;
+   char *path;
+};
+
+struct subvol_list {
+   size_t num;
+   struct listed_subvol subvols[];
+};
+
+typedef int (*btrfs_list_filter_func_v2)(struct listed_subvol *, uint64_t);
+typedef int (*btrfs_list_comp_func_v2)(const struct listed_subvol *,
+   const struct listed_subvol *,
+   int);
+
+struct btrfs_list_filter_v2 {
+   btrfs_list_filter_func_v2 filter_func;
+   u64 data;
+};
+
+struct btrfs_list_comparer_v2 {
+   btrfs_list_comp_func_v2 comp_func;
+   int is_descending;
+};
+
+struct btrfs_list_filter_set_v2 {
+   int total;
+   int nfilters;
+   int only_deleted;
+   struct btrfs_list_filter_v2 filters[0];
+};
+
+struct btrfs_list_comparer_set_v2 {
+   int total;
+   int ncomps;
+   struct btrfs_list_comparer_v2 comps[0];
+};
+
+static struct {
+   char*name;
+   char*column_name;
+   int need_print;
+} btrfs_list_columns[] = {
+   {
+   .name   = "ID",
+   .column_name= "ID",
+   .need_print = 0,
+   },
+   {
+   .name   = "gen",
+   .column_name= "Gen",
+   .need_print = 0,
+   },
+   {
+   .name   = "cgen",
+   .column_name= "CGen",
+   .need_print = 0,
+   },
+   {
+   .name   = "parent",
+   .column_name= "Parent",
+   .need_print = 0,
+   },
+   {
+   .name   = "top level",
+   .column_name= "Top Level",
+   .need_print = 0,
+   },
+   {
+   .name   = "otime",
+   .column_name= "OTime",
+   .need_print = 0,
+   },
+   {
+   .name   = "parent_uuid",
+   .column_name= "Parent UUID",
+   .need_print = 0,
+   },
+   {
+   .name   = "received_uuid",
+   .column_name= "Received UUID",
+   .need_print = 0,
+   },
+   {
+   .name   = "uuid",
+   .column_name= "UUID",
+   .need_print = 0,
+   },
+   {
+   .name   = "path",
+   .column_name= "Path",
+   .need_print = 0,
+   },
+   {
+   .name   = NULL,
+   .column_name= NULL,
+   .need_print = 0,
+   },
+};
+
+static btrfs_list_filter_func_v2 all_filter_funcs[];
+static btrfs_list_comp_func_v2 all_comp_funcs[];
+
+static void btrfs_list_setup_print_column_v2(enum btrfs_list_column_enum 
column)
+{
+   int i;
+
+   ASSERT(0 <= column && column <= BTRFS_LIST_ALL);
+
+   if (column < BTRFS_LIST_ALL) {
+   btrfs_list_columns[column].need_print = 1;
+   return;
+   }
+
+   for (i = 0; i < BTRFS_LIST_ALL; i++)
+   btrfs_list_columns[i].need_print = 1;
+}
+
+static int comp_entry_with_rootid_v2(const struct listed_subvol *entry1,
+ const struct listed_subvol *entry2,
+ int is_descending)
+{
+   int ret;
+
+   if (entry1->info.id > entry2->info.id)
+   ret = 1;
+   else if (entry1->info.id < entry2->info.id)
+   ret = -1;
+   else
+   ret = 0;
+
+   return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_gen_v2(const struct listed_subvol *entry1,
+  const struct listed_subvol *entry2,
+  int is_descending)
+{
+   int ret;
+
+   if (entry1->info.generation > entry2->info.generation)
+   ret = 1;
+

[PATCH 8/8] btrfs-porgs: test: Add cli-test/009 to check subvolume list for both root and normal user

2018-11-26 Thread Misono Tomohiro
Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 tests/cli-tests/009-subvolume-list/test.sh | 130 +
 1 file changed, 130 insertions(+)
 create mode 100755 tests/cli-tests/009-subvolume-list/test.sh

diff --git a/tests/cli-tests/009-subvolume-list/test.sh 
b/tests/cli-tests/009-subvolume-list/test.sh
new file mode 100755
index ..50b7eb6b
--- /dev/null
+++ b/tests/cli-tests/009-subvolume-list/test.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# test for "subvolume list" both for root and normal user
+
+source "$TEST_TOP/common"
+
+check_testuser
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev
+
+
+# test if the ids returned by "sub list" match expected ids
+# $1  ... indicate run as root or test user
+# $2  ... PATH to be specified by sub list command
+# $3~ ... expected return ids
+test_list()
+{
+   local SUDO
+   if [ $1 -eq 1 ]; then
+   SUDO=$SUDO_HELPER
+   else
+   SUDO="sudo -u progs-test"
+   fi
+
+   result=$(run_check_stdout $SUDO "$TOP/btrfs" subvolume list "$2" | \
+   awk '{print $2}' | xargs | sort -n)
+
+   shift
+   shift
+   expected=($(echo "$@" | tr " " "\n" | sort -n))
+   expected=$(IFS=" "; echo "${expected[*]}")
+
+   if [ "$result" != "$expected" ]; then
+   echo "result  : $result"
+   echo "expected: $expected"
+   _fail "ids returned by sub list does not match expected ids"
+   fi
+}
+
+run_check $SUDO_HELPER "$TOP/mkfs.btrfs" -f "$TEST_DEV"
+run_check_mount_test_dev
+cd "$TEST_MNT"
+
+# create subvolumes and directories and make some non-readable
+# by user 'progs-test'
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub1
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub1/subsub1
+run_check $SUDO_HELPER mkdir sub1/dir
+
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub2
+run_check $SUDO_HELPER mkdir -p sub2/dir/dirdir
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub2/dir/subsub2
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub2/dir/dirdir/subsubX
+
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub3
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub3/subsub3
+run_check $SUDO_HELPER mkdir sub3/dir
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub3/dir/subsubY
+run_check $SUDO_HELPER chmod o-r sub3
+
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub4
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub4/subsub4
+run_check $SUDO_HELPER mkdir sub4/dir
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub4/dir/subsubZ
+run_check $SUDO_HELPER setfacl -m u:progs-test:- sub4/dir
+
+run_check $SUDO_HELPER touch "file"
+
+# expected result for root at mount point:
+#
+# ID 256 gen 8 top level 5 path sub1
+# ID 258 gen 7 top level 256 path sub1/subsub1
+# ID 259 gen 10 top level 5 path sub2
+# ID 260 gen 9 top level 259 path sub2/dir/subsub2
+# ID 261 gen 10 top level 259 path sub2/dir/dirdir/subsubX
+# ID 262 gen 14 top level 5 path sub3
+# ID 263 gen 12 top level 262 path sub3/subsub3
+# ID 264 gen 13 top level 262 path sub3/dir/subsubY
+# ID 265 gen 17 top level 5 path sub4
+# ID 266 gen 15 top level 265 path sub4/subsub4
+# ID 267 gen 16 top level 265 path sub4/dir/subsubZ
+
+# check for root for both absolute/relative path
+all=(256 258 259 260 261 262 263 264 265 266 267)
+test_list 1 "$TEST_MNT" "${all[@]}"
+test_list 1 "$TEST_MNT/sub1" "256 258"
+run_mustfail "should raise invalid argument error" \
+   sudo "$TOP/btrfs" subvolume list "$TEST_MNT/sub1/dir"
+test_list 1 "$TEST_MNT/sub2" "259 260 261"
+test_list 1 "$TEST_MNT/sub3" "262 263 264"
+test_list 1 "$TEST_MNT/sub4" "265 266 267"
+run_mustfail "should fail for file" \
+   $SUDO_HELPER "$TOP/btrfs" subvolume list "$TEST_MNT/file"
+
+test_list 1 "." "${all[@]}"
+test_list 1 "sub1" "256 258"
+run_mustfail "should raise invalid argument error" \
+   sudo "$TOP/btrfs" subvolume list "sub1/dir"
+test_list 1 "sub2" "259 260 261"
+test_list 1 "sub3" "262 263 264"
+test_list 1 "sub4" "265 266 267"
+run_mustfail "should fail for file" \
+   $SUDO_HELPER "$TOP/btrfs" subvolume list "file"
+
+# check for normal user for both absolute/relative path
+test_list 0 "$TEST_MNT" "256 258 259 260 261 265 266"
+test_list 0 &

[PATCH 7/8] btrfs-progs: test: Add helper function to check if test user exists

2018-11-26 Thread Misono Tomohiro
Add helper function to check that if user 'nobody' exists or not.
Note that 'nobody' should not have root privileges as it will be used
to test the behavior of non-privileged user.

Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 tests/common | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/tests/common b/tests/common
index 1575ae38..16693ca7 100644
--- a/tests/common
+++ b/tests/common
@@ -314,6 +314,16 @@ check_global_prereq()
fi
 }
 
+check_testuser()
+{
+   id -u nobody > /dev/null 2>&1
+   if [ $? -ne 0 ]; then
+   _not_run "Need to add user \"nobody\""
+   fi
+   # Note that nobody should not have root privileges
+   # otherwise test may not run as expected
+}
+
 check_image()
 {
local image
-- 
2.19.1




[PATCH 4/8] btrfs-progs: sub list: Update -a option and remove meaningless filter

2018-11-26 Thread Misono Tomohiro
Currently, -a option add filter and change subvolume path as follows:
  - If a subvolume is a child of the specified path, nothing changes
  - otherwise, adds  to head

This is rather meaningless, so let's remove this filter.

As a result, the behavior of -a option becomes the same as
default behavior of sub list in progs <= 4.19

[Example]
 $ mkfs.btrfs -f $DEV
 $ mount $DEV /mnt

 $ btrfs subvolume create /mnt/AAA
 $ btrfs subvolume create /mnt/AAA/BBB
 $ btrfs subvolume create /mnt/ZZZ

 $ btrfs subvolume list -a /mnt
 ID 256 gen 9 top level 5 path AAA
 ID 257 gen 9 top level 256 path AAA/BBB
 ID 258 gen 10 top level 5 path ZZZ

 ** output of progs <= 4.19
 $ btrfs subvolume list -a /mnt
 ID 256 gen 9 top level 5 path AAA
 ID 257 gen 9 top level 256 path /AAA/BBB
 ID 258 gen 10 top level 5 path ZZZ

Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 Documentation/btrfs-subvolume.asciidoc |  6 +++--
 cmds-subvolume.c   | 35 +++---
 2 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index 99fff977..428a2faa 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -118,8 +118,10 @@ Path filtering;;
 -o
 print only subvolumes below specified .
 -a
-print all the subvolumes in the filesystem and distinguish between
-absolute and relative path with respect to the given .
+print all the subvolumes in the filesystem, including subvolumes
+which cannot be accessed from current mount point.
+path to be shown is relative to the top-level subvolume
+(require root privileges).
 
 Field selection;;
 -p
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index ef613662..cd2e4425 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -761,28 +761,6 @@ static int filter_topid_equal(struct listed_subvol 
*subvol, uint64_t data)
return subvol->info.parent_id == data;
 }
 
-static int filter_full_path(struct listed_subvol *subvol, uint64_t data)
-{
-   /*
-* This implements the same behavior as before the conversion to
-* libbtrfsutil, which is mostly nonsensical.
-*/
-   if (subvol->info.parent_id != data) {
-   char *tmp;
-   int ret;
-
-   ret = asprintf(, "/%s", subvol->path);
-   if (ret == -1) {
-   error("out of memory");
-   exit(1);
-   }
-
-   free(subvol->path);
-   subvol->path = tmp;
-   }
-   return 1;
-}
-
 static int filter_by_parent(struct listed_subvol *subvol, uint64_t data)
 {
return !uuid_compare(subvol->info.parent_uuid,
@@ -800,7 +778,6 @@ static btrfs_list_filter_func_v2 all_filter_funcs[] = {
[BTRFS_LIST_FILTER_CGEN_LESS]   = filter_cgen_less,
[BTRFS_LIST_FILTER_CGEN_EQUAL]  = filter_cgen_equal,
[BTRFS_LIST_FILTER_TOPID_EQUAL] = filter_topid_equal,
-   [BTRFS_LIST_FILTER_FULL_PATH]   = filter_full_path,
[BTRFS_LIST_FILTER_BY_PARENT]   = filter_by_parent,
 };
 
@@ -1411,9 +1388,9 @@ static const char * const cmd_subvol_list_usage[] = {
"",
"Path filtering:",
"-o   print only subvolumes below specified path",
-   "-a   print all the subvolumes in the filesystem and",
-   " distinguish absolute and relative path with respect",
-   " to the given  (require root privileges)",
+   "-a   print all the subvolumes in the filesystem.",
+   " path to be shown is relative to the top-level",
+   " subvolume (require root privileges)",
"",
"Field selection:",
"-p   print parent ID",
@@ -1581,11 +1558,7 @@ static int cmd_subvol_list(int argc, char **argv)
if (ret)
goto out;
 
-   if (is_list_all)
-   btrfs_list_setup_filter_v2(_set,
-   BTRFS_LIST_FILTER_FULL_PATH,
-   top_id);
-   else if (is_only_in_path)
+   if (is_only_in_path)
btrfs_list_setup_filter_v2(_set,
BTRFS_LIST_FILTER_TOPID_EQUAL,
top_id);
-- 
2.19.1




[PATCH 5/8] btrfs-progs: utils: Fallback to open without O_NOATIME flag in find_mount_root():

2018-11-26 Thread Misono Tomohiro
O_NOATIME flag requires effective UID of process matches file's owner
or has CAP_FOWNER capabilities. Fallback to open without O_NOATIME flag
so that non-privileged user can also call find_mount_root().

This is a preparation work to allow non-privileged user to call
"subvolume show".

Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 utils.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/utils.c b/utils.c
index 3a4bc92a..8ce8417d 100644
--- a/utils.c
+++ b/utils.c
@@ -2054,6 +2054,9 @@ int find_mount_root(const char *path, char **mount_root)
char *longest_match = NULL;
 
fd = open(path, O_RDONLY | O_NOATIME);
+   if (fd < 0 && errno == EPERM)
+   fd = open(path, O_RDONLY);
+
if (fd < 0)
return -errno;
close(fd);
-- 
2.19.1




[PATCH RESEND 0/8] btrfs-progs: sub: Relax the privileges of "subvolume list/show"

2018-11-26 Thread Misono Tomohiro
Hello,

This is basically the resend of 
  "[PATCH v2 00/20] btrfs-progs: Rework of "subvolume list/show" and relax the
root privileges of them" [1]
which I submitted in June. The aim of this series is to allow non-privileged 
user
to use basic subvolume functionality (create/list/snapshot/delete; this allows 
"list")

They were once in devel branch with some whitespace/comment modification by 
david.
I rebased them to current devel branch.

github: https://github.com/t-msn/btrfs-progs/tree/rework-sub-list

Basic logic/code is the same as before. Some differences are:
 - Use latest libbtrfsutil from Omar [2] (thus drop first part of patches).
   As a result, "sub list" cannot accept an ordinary directry to be
   specified (which is allowed in previous version)
 - Drop patches which add new options to "sub list"
 - Use 'nobody' as non-privileged test user just like libbtrfsutil test
 - Update comments

Importantly, in order to make output consistent for both root and non-privileged
user, this changes the behavior of "subvolume list": 
 - (default) Only list in subvolume under the specified path.
   Path needs to be a subvolume.
 - (-a) filter is dropped. i.e. its output is the same as the
default behavior of "sub list" in progs <= 4.19

Therefore, existent scripts may need to update to add -a option
(I believe nobody uses current -a option).
If anyone thinks this is not good, please let me know.

Behavior summary from cover letter in [1]

* Behavior summary of new "sub list"
  - default (no option)
- lists subvolumes below the specified path (inc. path itself)
- If new ioctls exists non-privileged user can call it
(subvolumes to which the user cannot access will be skipped)

  - -a
- updated to remove filter. i.e. the output is the same as current progs
  without option (require root privileges)

 [Example]
  $ mkfs.btrfs -f $DEV
  $ mkfs.btrfs -f $DEV2
  $ mount $DEV $MNT

  $ btrfs subvolume create $MNT/AAA
  $ btrfs subvolume create $MNT/BBB
  $ btrfs subvolume create $MNT/CCC
  $ btrfs subvolume create $MNT/DDD
  $ mkdir $MNT/AAA/bbb
  $ mkdir $MNT/AAA/ccc
  $ mkdir $MNT/AAA/other

  $ umount $MNT
  $ mount -o subvol=AAA $DEV $MNT
  $ mount -o subvol=BBB $DEV $MNT/bbb
  $ mount -o subvol=CCC $DEV $MNT/ccc
  $ mount -o $DEV2 $MNT/other

  $ btrfs subvolume list $MNT # print subvolumes below the path
  ID 256 gen 10 top level 5 path .

  $ btrfs subvolume list -a $MNT
  # print all subvolumes in the fs. the same output as progs<=4.19 without 
option
  ID 256 gen 10 top level 5 path AAA
  ID 258 gen 7 top level 5 path BBB
  ID 259 gen 8 top level 5 path CCC
  ID 260 gen 9 top level 5 path DDD

* Behavior summary of new "sub show"
  - No change for root's output
  - If new ioctls exists, non-privileged user can call it
- In that case, path to be shown is absolute path
  (for root, it is relative to top-level subvolume)
  Also, snapshots to be shown are to which the user can access from current
mount point.
  (for root, all snapshots in the fs)
===

[1] 
https://lore.kernel.org/linux-btrfs/cover.1529310485.git.misono.tomoh...@jp.fujitsu.com/
[2] https://lore.kernel.org/linux-btrfs/cover.1542181521.git.osan...@fb.com/

Thanks,
Misono

Misono Tomohiro (8):
  btrfs-progs: sub list: Use libbtrfsuitl for subvolume list
  btrfs-progs: sub list: factor out main part of btrfs_list_subvols
  btrfs-progs: sub list: Change the default behavior of "subvolume list"
and allow non-privileged user to call it
  btrfs-progs: sub list: Update -a option and remove meaningless filter
  btrfs-progs: utils: Fallback to open without O_NOATIME flag in
find_mount_root():
  btrfs-progs: sub show: Allow non-privileged user to call "subvolume
show"
  btrfs-progs: test: Add helper function to check if test user exists
  btrfs-porgs: test: Add cli-test/009 to check subvolume list for both
root and normal user

 Documentation/btrfs-subvolume.asciidoc |   25 +-
 cmds-subvolume.c   | 1149 +++-
 tests/cli-tests/009-subvolume-list/test.sh |  130 +++
 tests/common   |   10 +
 utils.c|3 +
 5 files changed, 1266 insertions(+), 51 deletions(-)
 create mode 100755 tests/cli-tests/009-subvolume-list/test.sh

-- 
2.19.1




[PATCH 3/8] btrfs-progs: sub list: Change the default behavior of "subvolume list" and allow non-privileged user to call it

2018-11-26 Thread Misono Tomohiro
Change the default behavior of "subvolume list" and allow non-privileged
user to call it as well.

>From this commit, by default it only lists subvolumes under the specified
path (incl. the path itself except top-level subvolume. the path needs
to be a subvolume). Also, if kernel supports new ioctls
(BTRFS_IOC_GET_SUBVOL_INFO/BTRFS_IOC_GET_SUBVOL_ROOTREF and
BTRFS_IOC_INO_LOOKUP_USER, which are avilable from 4.18), non-privileged
user can also use "sub list" command (subvolumes which the user does not
have access right will be skiped).

Note that root user can list all the subvolume in the fs with -a option.

[Example]
 $ mkfs.btrfs -f $DEV
 $ mount $DEV /mnt

 $ btrfs subvolume create /mnt/AAA
 $ btrfs subvolume create /mnt/AAA/BBB
 $ mkdir /mnt/AAA/BBB/dir
 $ btrfs subvolume create /mnt/AAA/BBB/dir/CCC
 $ btrfs subvolume create /mnt/ZZZ

 $ umount /mnt
 $ mount -o subvol=AAA $DEV /mnt

 $ btrfs subvolume list /mnt
 ID 256 gen 11 top level 5 path .
 ID 257 gen 8 top level 256 path BBB
 ID 258 gen 8 top level 257 path BBB/dir/CCC

 $ btrfs subvolume list /mnt/BBB
 ID 257 gen 8 top level 256 path .
 ID 258 gen 8 top level 257 path dir/CCC

 ** output of progs <= 4.19
 $ mount -o subvol=AAA $DEV /mnt
 $ btrfs subvolume list /mnt
 ID 256 gen 11 top level 5 path AAA
 ID 257 gen 8 top level 256 path BBB
 ID 258 gen 8 top level 257 path BBB/dir/CCC
 ID 259 gen 11 top level 256 path ZZZ

Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 Documentation/btrfs-subvolume.asciidoc |   8 +-
 cmds-subvolume.c   | 144 +++--
 2 files changed, 119 insertions(+), 33 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index f3eb4e26..99fff977 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -95,6 +95,12 @@ The output format is similar to *subvolume list* command.
 
 *list* [options] [-G [\+|-]] [-C [+|-]] 
[--sort=rootid,gen,ogen,path] ::
 List the subvolumes present in the filesystem .
+By default, this only lists the subvolumes under ,
+including  itself (except top-level subvolume).
++
+This command had required root privileges. From kernel 4.18,
+non privileged user can call this too. Also from kernel 4.18,
+It is possible to specify non-subvolume directory as .
 +
 For every subvolume the following information is shown by default:
 +
@@ -102,7 +108,7 @@ ID  gen  top level  path 
 +
 where ID is subvolume's id, gen is an internal counter which is updated
 every transaction, top level is the same as parent subvolume's id, and
-path is the relative path of the subvolume to the top level subvolume.
+path is the relative path of the subvolume to the specified path.
 The subvolume's ID may be used by the subvolume set-default command,
 or at mount time via the subvolid= option.
 +
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 40cc2687..ef613662 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1128,9 +1128,37 @@ out:
return subvols;
 }
 
+static int add_subvol(struct subvol_list **subvols,
+ struct listed_subvol *subvol,
+ size_t *capacity)
+{
+   if ((*subvols)->num >= *capacity) {
+   struct subvol_list *new_subvols;
+   size_t new_capacity = max_t(size_t, 1, *capacity * 2);
+
+   new_subvols = realloc(*subvols,
+ sizeof(*new_subvols) +
+ new_capacity *
+ sizeof(new_subvols->subvols[0]));
+   if (!new_subvols) {
+   error("out of memory");
+   return -1;
+   }
+
+   *subvols = new_subvols;
+   *capacity = new_capacity;
+   }
+
+   (*subvols)->subvols[(*subvols)->num] = *subvol;
+   (*subvols)->num++;
+
+   return 0;
+}
+
 static void get_subvols_info(struct subvol_list **subvols,
 struct btrfs_list_filter_set_v2 *filter_set,
 int fd,
+int tree_id,
 size_t *capacity)
 {
struct btrfs_util_subvolume_iterator *iter;
@@ -1138,7 +1166,7 @@ static void get_subvols_info(struct subvol_list **subvols,
int ret = -1;
 
err = btrfs_util_create_subvolume_iterator_fd(fd,
- BTRFS_FS_TREE_OBJECTID, 0,
+ tree_id, 0,
  );
if (err) {
iter = NULL;
@@ -1146,6 +1174,52 @@ static void get_subvols_info(struct subvol_list 
**subvols,
goto out;
}
 
+   /*
+* Subvolume iterator does not include the information of the
+* specified path/fd. So, add it first.
+

[PATCH 2/8] btrfs-progs: sub list: factor out main part of btrfs_list_subvols

2018-11-26 Thread Misono Tomohiro
No functional changes.
This is a preparation work for reworking "subvolume list".

Signed-off-by: Misono Tomohiro 
Signed-off-by: David Sterba 
---
 cmds-subvolume.c | 50 +---
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 84a03fd8..40cc2687 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1128,22 +1128,15 @@ out:
return subvols;
 }
 
-static struct subvol_list *btrfs_list_subvols(int fd,
- struct btrfs_list_filter_set_v2 
*filter_set)
+static void get_subvols_info(struct subvol_list **subvols,
+struct btrfs_list_filter_set_v2 *filter_set,
+int fd,
+size_t *capacity)
 {
-   struct subvol_list *subvols;
-   size_t capacity = 0;
struct btrfs_util_subvolume_iterator *iter;
enum btrfs_util_error err;
int ret = -1;
 
-   subvols = malloc(sizeof(*subvols));
-   if (!subvols) {
-   error("out of memory");
-   return NULL;
-   }
-   subvols->num = 0;
-
err = btrfs_util_create_subvolume_iterator_fd(fd,
  BTRFS_FS_TREE_OBJECTID, 0,
  );
@@ -1171,11 +1164,11 @@ static struct subvol_list *btrfs_list_subvols(int fd,
continue;
}
 
-   if (subvols->num >= capacity) {
+   if ((*subvols)->num >= *capacity) {
struct subvol_list *new_subvols;
-   size_t new_capacity = max_t(size_t, 1, capacity * 2);
+   size_t new_capacity = max_t(size_t, 1, *capacity * 2);
 
-   new_subvols = realloc(subvols,
+   new_subvols = realloc(*subvols,
  sizeof(*new_subvols) +
  new_capacity *
  sizeof(new_subvols->subvols[0]));
@@ -1184,12 +1177,12 @@ static struct subvol_list *btrfs_list_subvols(int fd,
goto out;
}
 
-   subvols = new_subvols;
-   capacity = new_capacity;
+   *subvols = new_subvols;
+   *capacity = new_capacity;
}
 
-   subvols->subvols[subvols->num] = subvol;
-   subvols->num++;
+   (*subvols)->subvols[(*subvols)->num] = subvol;
+   (*subvols)->num++;
}
 
ret = 0;
@@ -1197,9 +1190,26 @@ out:
if (iter)
btrfs_util_destroy_subvolume_iterator(iter);
if (ret) {
-   free_subvol_list(subvols);
-   subvols = NULL;
+   free_subvol_list(*subvols);
+   *subvols = NULL;
+   }
+}
+
+static struct subvol_list *btrfs_list_subvols(int fd,
+ struct btrfs_list_filter_set_v2 
*filter_set)
+{
+   struct subvol_list *subvols;
+   size_t capacity = 0;
+
+   subvols = malloc(sizeof(*subvols));
+   if (!subvols) {
+   error("out of memory");
+   return NULL;
}
+   subvols->num = 0;
+
+   get_subvols_info(, filter_set, fd, );
+
return subvols;
 }
 
-- 
2.19.1




Re: [RFC PATCH 1/6] btrfs-progs: splitcmd-gen.sh: create btrfs- binaries for selected subcommands

2018-08-29 Thread Misono Tomohiro
On 2018/08/30 2:24, Axel Burri wrote:
> Create separate binaries for each subcommand ("btrfs foo bar").
> Least invasive approach, generate c-files for each command:
> 
> # ./splitcmd-gen.sh
> # make V=1 btrfs-subvolume-show
> # make V=1 btrfs-send
> # [...]
> 
> Alternative approach: instead of including the c-file, link with obj
> in Makefile, e.g.:
> 
> btrfs_subvolume_show_objects = cmds-subvolume.o
> btrfs_send_objects = cmds-send.o
> [...]
> 
> This implies adaptions in cmds-subvolume.c (and others):
> 
> -static int cmd_filesystem_show(int argc, char **argv)
> +int cmd_filesystem_show(int argc, char **argv)
> 
> If they are defined non-static, we could probably simplify further and
> add `-Wl,-eentry` flags (changing entry point from "main" to "entry").
> 
> With this, and if handle_command_group() was declared in some library
> instead of btrfs.c, we would get rid of generated files completely.
> 
> Signed-off-by: Axel Burri 
> ---
>  splitcmd-gen.sh | 70 
> +
>  splitcmd.c.in   | 17 ++
>  2 files changed, 87 insertions(+)
>  create mode 100755 splitcmd-gen.sh
>  create mode 100644 splitcmd.c.in
> 
> diff --git a/splitcmd-gen.sh b/splitcmd-gen.sh
> new file mode 100755
> index ..4d2e0509
> --- /dev/null
> +++ b/splitcmd-gen.sh
> @@ -0,0 +1,70 @@
> +#!/bin/bash
> +
> +#
> +# Generate c-files for btrfs subcommands defined below
> +#
> +
> +# Notes on linux capabilities:
> +#
> +# btrfs-subvolume-show, btrfs-subvolume-list, btrfs-send:
> +#  - CAP_FOWNER is only needed for O_NOATIME flag in open() system calls
> +#  - why CAP_SYS_ADMIN? shouldn't CAP_DAC_READ_SEARCH be enough?

Hello,

Not directly related this series and just FYI,
I'm working to allow sub show/list to non-privileged user as long
as he can access to the subvolume:
  https://www.spinics.net/lists/linux-btrfs/msg79285.html

Hopefully this will be merged to master in near future
(any comments from user/dev is welcome).

Thanks,
Misono

> +#
> +# btrfs-receive:
> +#  - dependent on send-stream (see cmds-receive.c: "send_ops"):
> +#CAP_CHOWN, CAP_MKNOD, CAP_SETFCAP (for "lsetxattr")
> +#
> +# btrfs-filesystem-usage:
> +#  - CAP_SYS_ADMIN is for BTRFS_IOC_TREE_SEARCH and BTRFS_IOC_FS_INFO
> +#in order to provide full level of detail, see btrfs-filesystem(8)
> +
> +
> +makefile_out="Makefile.install_setcap"
> +
> +splitcmd_list=""
> +setcap_lines=""
> +
> +function gen_splitcmd {
> +local name="$1"
> +local dest="${1}.c"
> +local cfile="$2"
> +local entry="$3"
> +local caps="$4"
> +echo "generating: ${dest} (cfile=${cfile}, entry=${entry})"
> +echo -e "/*\n * ${name}\n *\n * GENERATED BY splitcmd-gen.sh\n */\n" > 
> $dest
> +sed -e "s|@BTRFS_SPLITCMD_CFILE_INCLUDE@|${cfile}|g" \
> +-e "s|@BTRFS_SPLITCMD_ENTRY@|${entry}|g" \
> +splitcmd.c.in >> $dest
> +}
> +
> +gen_splitcmd "btrfs-subvolume-show" \
> + "cmds-subvolume.c" "cmd_subvol_show" \
> + "cap_sys_admin,cap_fowner,cap_dac_read_search"
> +
> +gen_splitcmd "btrfs-subvolume-list" \
> + "cmds-subvolume.c" "cmd_subvol_list" \
> + "cap_sys_admin,cap_fowner,cap_dac_read_search"
> +
> +gen_splitcmd "btrfs-subvolume-snapshot" \
> + "cmds-subvolume.c" "cmd_subvol_snapshot" \
> + "cap_sys_admin,cap_fowner,cap_dac_override,cap_dac_read_search"
> +
> +gen_splitcmd "btrfs-subvolume-delete" \
> + "cmds-subvolume.c" "cmd_subvol_delete" \
> + "cap_sys_admin,cap_dac_override"
> +
> +gen_splitcmd "btrfs-send" \
> + "cmds-send.c" "cmd_send" \
> + "cap_sys_admin,cap_fowner,cap_dac_read_search"
> +
> +gen_splitcmd "btrfs-receive" \
> + "cmds-receive.c" "cmd_receive" \
> + 
> "cap_sys_admin,cap_fowner,cap_chown,cap_mknod,cap_setfcap,cap_dac_override,cap_dac_read_search"
> +
> +gen_splitcmd "btrfs-filesystem-usage" \
> + "cmds-fi-usage.c" "cmd_filesystem_usage" \
> + "cap_sys_admin"
> +
> +gen_splitcmd "btrfs-qgroup-destroy" \
> + "cmds-qgroup.c" "cmd_qgroup_destroy" \
> + "cap_sys_admin,cap_dac_override"
> diff --git a/splitcmd.c.in b/splitcmd.c.in
> new file mode 100644
> index ..aa07af9a
> --- /dev/null
> +++ b/splitcmd.c.in
> @@ -0,0 +1,17 @@
> +#include "@BTRFS_SPLITCMD_CFILE_INCLUDE@"
> +
> +/*
> + * Dummy object: used from second-level command groups (e.g. in
> + * "cmds-subvolume.c"), is never called in splitcmd executables.
> + */
> +int handle_command_group(const struct cmd_group *grp, int argc,
> +  char **argv)
> +{
> + exit(1);
> +}
> +
> +
> +int main(int argc, char **argv)
> +{
> + return @BTRFS_SPLITCMD_ENTRY@(argc, argv);
> +}
> 



[PATCH] btrfs: extent-tree.c: Remove redundant variable from btrfs_cross_ref_exist()

2018-08-29 Thread Misono Tomohiro
Since commit d7df2c796d7e ("Btrfs attach delayed ref updates to
delayed ref heads"), check_delaed_ref() won't return -ENOENT.

In btrfs_cross_ref_exist(), two variable 'ret' and 'ret2' is
originally used to handle -ENOENT error case.

Since the code is not needed anymore, let's just remove 'ret2'.

Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/extent-tree.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2d9074295d7f..0c87472d5719 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3139,7 +3139,6 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 
objectid, u64 offset,
 {
struct btrfs_path *path;
int ret;
-   int ret2;
 
path = btrfs_alloc_path();
if (!path)
@@ -3151,17 +3150,10 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 
objectid, u64 offset,
if (ret && ret != -ENOENT)
goto out;
 
-   ret2 = check_delayed_ref(root, path, objectid,
+   ret = check_delayed_ref(root, path, objectid,
 offset, bytenr);
-   } while (ret2 == -EAGAIN);
+   } while (ret == -EAGAIN);
 
-   if (ret2 && ret2 != -ENOENT) {
-   ret = ret2;
-   goto out;
-   }
-
-   if (ret != -ENOENT || ret2 != -ENOENT)
-   ret = 0;
 out:
btrfs_free_path(path);
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
-- 
2.17.1




Re: [PATCH v2] btrfs: Always check nocow for quota enabled case to make sure we won't reserve unnecessary data space

2018-08-29 Thread Misono Tomohiro
On 2018/08/28 14:21, Qu Wenruo wrote:
> On 2018/8/24 下午4:09, Misono Tomohiro wrote:
> [snip]
>>>
>>> BTW, what's the possibility of such problem in your test environment?
>>
>> It's like one in several times.
>> It may depend on hardware performance? (the machine is not so fast),
>>
>> I also noticed following warning happens too (not always):
>>
> 
> After digging into the case, it's more complex than just my patch.
> 
> Firstly, we lacks a lot of underflow check when modifying bytes_may_use.
> So we need to do all the underflow detection for every modifier of
> bytes_may_use.
> 
> Secondly, btrfs_cross_ref_exist() check makes NODATACOW check in
> __btrfs_buffered_write() unreliable.
> 
> For the following case, at __btrfs_buffered_write() time we're pretty
> sure we could do NODATACOW, but at sync time, due to cloned range,
> btrfs_cross_ref_exist() would detect reflinked prealloc extent, then
> falls back to CoW, and finally cause bytes_may_use underflow:
> 
> ---
> mkfs.btrfs -f $dev > $full_log
> 
> mount $dev $mnt -o nospace_cache
> btrfs quota enable $mnt
> btrfs quota rescan -w $mnt
> 
> xfs_io -f -c "falloc 0 2M" $mnt/file1 > /dev/null
> xfs_io -c "pwrite -b 1M 0 1M" $mnt/file1 > /dev/null
> xfs_io -c "reflink $mnt/file1 1M 4M 1M" $mnt/file1 > /dev/null
> sync
> 

Thanks for the explanation. I will try to understand the relevant code.

> 
> Even without my patch, the "pwrite" command is still CoWed, which could
> be avoided.
> And that's the reason my patch is causing the underflow.
> 
> To fix this, we need more accurate btrfs_cross_ref_exist() check, not
> only for @disk_bytenr but also check @len.
> 
> Or we could try to flush the whole inode in clone_range() so we could go
> through NOCOW routine before clone really happens.

So as your RFC patch does not work, the option is first one?

Thanks,
Misono

> 
> Thanks,
> Qu
> 



Re: [PATCH v2] btrfs: Always check nocow for quota enabled case to make sure we won't reserve unnecessary data space

2018-08-24 Thread Misono Tomohiro
On 2018/08/24 16:58, Qu Wenruo wrote:
> 
> 
> On 2018/8/24 下午3:54, Misono Tomohiro wrote:
>> On 2018/08/24 16:20, Qu Wenruo wrote:
>>>
>>>
>>> On 2018/8/24 下午3:14, Misono Tomohiro wrote:
>>>> Hi,
>>>>
>>>> On 2018/08/21 14:40, Qu Wenruo wrote:
>>>>> Commit c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
>>>>> makes nocow check less frequent to improve performance.
>>>>>
>>>>> However for quota enabled case, such optimization could lead to extra
>>>>> unnecessary data reservation, which results failure for test case like
>>>>> btrfs/153 in fstests.
>>>>>
>>>>> Fix it by reverting to old behavior for quota enabled case.
>>>>>
>>>>> Fixes: c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
>>>>> Signed-off-by: Qu Wenruo 
>>>>> ---
>>>>> changelog
>>>>> v2:
>>>>>   Fix regression for quota+cow case. (Previously it will skip data
>>>>>   reservation if quota is enabled, causing regression for limit case.
>>>>>   Pointed out by Misono)
>>>>> ---
>>>>>  fs/btrfs/file.c | 18 +-
>>>>>  1 file changed, 17 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
>>>>> index 2be00e873e92..183e5fb96f42 100644
>>>>> --- a/fs/btrfs/file.c
>>>>> +++ b/fs/btrfs/file.c
>>>>> @@ -1584,6 +1584,7 @@ static noinline ssize_t btrfs_buffered_write(struct 
>>>>> kiocb *iocb,
>>>>>   int ret = 0;
>>>>>   bool only_release_metadata = false;
>>>>>   bool force_page_uptodate = false;
>>>>> + bool quota_enabled = test_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>>>>>  
>>>>>   nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
>>>>>   PAGE_SIZE / (sizeof(struct page *)));
>>>>> @@ -1624,13 +1625,28 @@ static noinline ssize_t 
>>>>> btrfs_buffered_write(struct kiocb *iocb,
>>>>>   fs_info->sectorsize);
>>>>>  
>>>>>   extent_changeset_release(data_reserved);
>>>>> +
>>>>> + /*
>>>>> +  * If we have quota enabled, we must do the heavy lift nocow
>>>>> +  * check here to avoid reserving data space, or we can hit
>>>>> +  * limitation for NOCOW files.
>>>>> +  */
>>>>> + if (quota_enabled) {
>>>>> + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
>>>>> +   BTRFS_INODE_PREALLOC)) &&
>>>>> + check_can_nocow(BTRFS_I(inode), pos,
>>>>> + _bytes) > 0)
>>>>> + goto reserve_meta_only;
>>>>> + }
>>>>>   ret = btrfs_check_data_free_space(inode, _reserved, pos,
>>>>> write_bytes);
>>>>>   if (ret < 0) {
>>>>>   if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
>>>>> BTRFS_INODE_PREALLOC)) &&
>>>>>   check_can_nocow(BTRFS_I(inode), pos,
>>>>> - _bytes) > 0) {
>>>>> + _bytes) > 0 &&
>>>>
>>>>> + !quota_enabled) {
>>>>
>>>> (When we check this condition, quota_enabled must be false or otherwise
>>>> we have already goto reserve_meta_only. So it seems redundant.)
>>>
>>> It's possible that we have quota enabled, and then
>>> btrfs_check_data_free_space() failed with -EDQUOT.
>>>
>>> In that case, we need above !quota_enabled check to avoid unnecessary
>>> check and just go error branch.
>>
>> So should quota_enabled be checked before check_can_nocow()?
> 
> Oh, yes, it should be put before nocow check.
> 
>>
>>>
>>>>
>>>>> +reserve_meta_only:
>>>>>   /*
>>>>>* For nodata cow case, no need to reserve
>>&

Re: [PATCH v2] btrfs: Always check nocow for quota enabled case to make sure we won't reserve unnecessary data space

2018-08-24 Thread Misono Tomohiro
On 2018/08/24 16:20, Qu Wenruo wrote:
> 
> 
> On 2018/8/24 下午3:14, Misono Tomohiro wrote:
>> Hi,
>>
>> On 2018/08/21 14:40, Qu Wenruo wrote:
>>> Commit c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
>>> makes nocow check less frequent to improve performance.
>>>
>>> However for quota enabled case, such optimization could lead to extra
>>> unnecessary data reservation, which results failure for test case like
>>> btrfs/153 in fstests.
>>>
>>> Fix it by reverting to old behavior for quota enabled case.
>>>
>>> Fixes: c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
>>> Signed-off-by: Qu Wenruo 
>>> ---
>>> changelog
>>> v2:
>>>   Fix regression for quota+cow case. (Previously it will skip data
>>>   reservation if quota is enabled, causing regression for limit case.
>>>   Pointed out by Misono)
>>> ---
>>>  fs/btrfs/file.c | 18 +-
>>>  1 file changed, 17 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
>>> index 2be00e873e92..183e5fb96f42 100644
>>> --- a/fs/btrfs/file.c
>>> +++ b/fs/btrfs/file.c
>>> @@ -1584,6 +1584,7 @@ static noinline ssize_t btrfs_buffered_write(struct 
>>> kiocb *iocb,
>>> int ret = 0;
>>> bool only_release_metadata = false;
>>> bool force_page_uptodate = false;
>>> +   bool quota_enabled = test_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>>>  
>>> nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
>>> PAGE_SIZE / (sizeof(struct page *)));
>>> @@ -1624,13 +1625,28 @@ static noinline ssize_t btrfs_buffered_write(struct 
>>> kiocb *iocb,
>>> fs_info->sectorsize);
>>>  
>>> extent_changeset_release(data_reserved);
>>> +
>>> +   /*
>>> +* If we have quota enabled, we must do the heavy lift nocow
>>> +* check here to avoid reserving data space, or we can hit
>>> +* limitation for NOCOW files.
>>> +*/
>>> +   if (quota_enabled) {
>>> +   if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
>>> + BTRFS_INODE_PREALLOC)) &&
>>> +   check_can_nocow(BTRFS_I(inode), pos,
>>> +   _bytes) > 0)
>>> +   goto reserve_meta_only;
>>> +   }
>>> ret = btrfs_check_data_free_space(inode, _reserved, pos,
>>>   write_bytes);
>>> if (ret < 0) {
>>> if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
>>>   BTRFS_INODE_PREALLOC)) &&
>>> check_can_nocow(BTRFS_I(inode), pos,
>>> -   _bytes) > 0) {
>>> +   _bytes) > 0 &&
>>
>>> +   !quota_enabled) {
>>
>> (When we check this condition, quota_enabled must be false or otherwise
>> we have already goto reserve_meta_only. So it seems redundant.)
> 
> It's possible that we have quota enabled, and then
> btrfs_check_data_free_space() failed with -EDQUOT.
> 
> In that case, we need above !quota_enabled check to avoid unnecessary
> check and just go error branch.

So should quota_enabled be checked before check_can_nocow()?

> 
>>
>>> +reserve_meta_only:
>>> /*
>>>  * For nodata cow case, no need to reserve
>>>  * data space.
>>>
>>
>> I applied this patch on today's misc-next and it seems mostly ok, but
>> btrfs/022 sometimes gives following warning:
> 
> This looks like related to the regression caused by commit
> c4c129db5da8f070147f175 ("btrfs: drop unused
> parameter qgroup_reserved").
> 
> Would you please try reverting that patch?

I think above commit is fixed by commit eb27db470 ("btrfs: fix
qgroup_free wrong num_bytes in btrfs_subvolume_reserve_metadata") which
is already in misc-next too.

I reverted above two patch (and one more related patch 6b0cb14901
("btrfs: drop useless member qgroup_reserved of btrfs_pending_snapshot")),
but get the same result.

Thanks,
Miso

Re: [PATCH v2] btrfs: Always check nocow for quota enabled case to make sure we won't reserve unnecessary data space

2018-08-24 Thread Misono Tomohiro
Hi,

On 2018/08/21 14:40, Qu Wenruo wrote:
> Commit c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
> makes nocow check less frequent to improve performance.
> 
> However for quota enabled case, such optimization could lead to extra
> unnecessary data reservation, which results failure for test case like
> btrfs/153 in fstests.
> 
> Fix it by reverting to old behavior for quota enabled case.
> 
> Fixes: c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
> Signed-off-by: Qu Wenruo 
> ---
> changelog
> v2:
>   Fix regression for quota+cow case. (Previously it will skip data
>   reservation if quota is enabled, causing regression for limit case.
>   Pointed out by Misono)
> ---
>  fs/btrfs/file.c | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 2be00e873e92..183e5fb96f42 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1584,6 +1584,7 @@ static noinline ssize_t btrfs_buffered_write(struct 
> kiocb *iocb,
>   int ret = 0;
>   bool only_release_metadata = false;
>   bool force_page_uptodate = false;
> + bool quota_enabled = test_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>  
>   nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
>   PAGE_SIZE / (sizeof(struct page *)));
> @@ -1624,13 +1625,28 @@ static noinline ssize_t btrfs_buffered_write(struct 
> kiocb *iocb,
>   fs_info->sectorsize);
>  
>   extent_changeset_release(data_reserved);
> +
> + /*
> +  * If we have quota enabled, we must do the heavy lift nocow
> +  * check here to avoid reserving data space, or we can hit
> +  * limitation for NOCOW files.
> +  */
> + if (quota_enabled) {
> + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
> +   BTRFS_INODE_PREALLOC)) &&
> + check_can_nocow(BTRFS_I(inode), pos,
> + _bytes) > 0)
> + goto reserve_meta_only;
> + }
>   ret = btrfs_check_data_free_space(inode, _reserved, pos,
> write_bytes);
>   if (ret < 0) {
>   if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
> BTRFS_INODE_PREALLOC)) &&
>   check_can_nocow(BTRFS_I(inode), pos,
> - _bytes) > 0) {
> + _bytes) > 0 &&

> + !quota_enabled) {

(When we check this condition, quota_enabled must be false or otherwise
we have already goto reserve_meta_only. So it seems redundant.)

> +reserve_meta_only:
>   /*
>* For nodata cow case, no need to reserve
>* data space.
> 

I applied this patch on today's misc-next and it seems mostly ok, but
btrfs/022 sometimes gives following warning:

[80244.152130] WARNING: CPU: 5 PID: 14575 at fs/btrfs/extent-tree.c:9742 
btrfs_free_block_groups+0x2d7/0x440 [btrfs]
[80244.152132] Modules linked in: btrfs(O) xor zstd_decompress zstd_compress 
xxhash raid6_pq xt_CHECKSUM ipt_MASQUERADE tun bridge stp llc xt_conntrack 
ip_set nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 
nf_nat nf_conntrack iptable_mangle iptable_raw iptable_security sunrpc 
intel_powerclamp kvm_intel kvm gpio_ich iTCO_wdt ipmi_ssif iTCO_vendor_support 
ipmi_si st irqbypass ipmi_devintf crct10dif_pclmul crc32_pclmul ipmi_msghandler 
ghash_clmulni_intel pcspkr acpi_power_meter i2c_i801 pcc_cpufreq i7core_edac 
lpc_ich acpi_cpufreq xfs libcrc32c mgag200 drm_kms_helper syscopyarea 
sysfillrect sysimgblt fb_sys_fops ttm drm igb sr_mod hwmon uas ptp crc32c_intel 
cdrom usb_storage pps_core ata_generic megaraid_sas dca pata_acpi i2c_algo_bit 
ipv6 [last unloaded: xor]
[80244.152185] CPU: 5 PID: 14575 Comm: umount Tainted: GW IO  
4.18.0-rc8+ #98
[80244.152187] Hardware name: FUJITSU-SV   PRIMERGY RX300 
S6 /D2619, BIOS 6.00 Rev. 1.09.2619.N1   12/13/2010
[80244.152205] RIP: 0010:btrfs_free_block_groups+0x2d7/0x440 [btrfs]
[80244.152206] Code: 85 20 cb 00 00 48 39 c6 0f 84 b9 00 00 00 49 bf 00 01 00 
00 00 00 ad de 48 8b 9d 20 cb 00 00 48 83 7b a0 00 0f 84 0d 01 00 00 <0f> 0b 48 
8d 73 88 31 c9 31 d2 48 89 ef e8 27 7a ff ff 48 89 df e8
[80244.152235] RSP: 0018:8ea10393fdb0 EFLAGS: 00010286
[80244.152237] RAX: 8c1025819e78 RBX: 8c1025819e78 RCX: 
[80244.152238] RDX: 0001 RSI: 8c115329cb20 RDI: 8c1025818e00
[80244.152239] RBP: 8c115329 R08:  R09: 
[80244.152240] R10: 8c1025818e98 R11: 0002 R12: 

[PATCH] btrfs: ctree.h: Fix suspicious rcu usage warning in btrfs_debug_in_rcu()

2018-08-23 Thread Misono Tomohiro
commit 672d599041c8 ("btrfs: Use wrapper macro for rcu string to remove
duplicate code") replaces some open coded rcu string handling with macro.

It turns out that btrfs_debug_in_rcu() is used for the first time and
the macro lacks lock/unlock of rcu string for non debug case
(i.e. when message is not printed), leading suspicious RCU usage warning
when CONFIG_PROVE_RCU is on.

Fix this by adding a wrapper to call lock/unlock for non debug case too.

Fixes: 672d599041c8 ("btrfs: Use wrapper macro for rcu string to remove
duplicate code")
Reported-by: David Howells 
Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/ctree.h | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 53af9f5253f4..cc8b4ff8dcea 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3390,9 +3390,9 @@ do {  
\
 #define btrfs_debug(fs_info, fmt, args...) \
btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_in_rcu(fs_info, fmt, args...) \
-   btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
+   btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
-   btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
+   btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_rl(fs_info, fmt, args...) \
btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
 #endif
@@ -3404,6 +3404,13 @@ do { 
\
rcu_read_unlock();  \
 } while (0)
 
+#define btrfs_no_printk_in_rcu(fs_info, fmt, args...)  \
+do {   \
+   rcu_read_lock();\
+   btrfs_no_printk(fs_info, fmt, ##args);  \
+   rcu_read_unlock();  \
+} while (0)
+
 #define btrfs_printk_ratelimited(fs_info, fmt, args...)\
 do {   \
static DEFINE_RATELIMIT_STATE(_rs,  \
-- 
2.14.4



Re: fs/btrfs/volumes.c:6114 suspicious rcu_dereference_check() usage!

2018-08-23 Thread Misono Tomohiro
On 2018/08/24 0:49, David Howells wrote:
> I'm seeing the attached message generated from this line:
> 
>   btrfs_debug_in_rcu(fs_info,
>   "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
>   bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
>   (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
>   bio->bi_iter.bi_size);
> 
> in submit_stripe_bio().  I'm not sure exactly why, but I suspect
> rcu_str_deref() is the point from where it is generated.
> 
> Note that the mount succeeds.
> 
> This code was introduced by:
> 
>   commit 672d599041c862dd61a1576c32e946ef0d77aa34
>   Author: Misono Tomohiro 
>   Date:   Thu Aug 2 16:19:07 2018 +0900
> 


Thanks for the reporting.
I didn't turn on CONFIG_PROVE_RCU and missed the warning.

I will send a fix.

Thanks,
Misono

> David
> ---
> =
> WARNING: suspicious RCU usage
> 4.18.0-fscache+ #540 Not tainted
> -
> fs/btrfs/volumes.c:6114 suspicious rcu_dereference_check() usage!
> 
> other info that might help us debug this:
> 
> 
> rcu_scheduler_active = 2, debug_locks = 1
> 1 lock held by mount/3194:
>  #0: 72604777 (>fs_type->s_umount_key#54/1){+.+.}, at: 
> alloc_super+0xa4/0x313
> 
> stack backtrace:
> CPU: 2 PID: 3194 Comm: mount Not tainted 4.18.0-fscache+ #540
> Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
> Call Trace:
>  dump_stack+0x67/0x8e
>  btrfs_map_bio+0x282/0x321
>  btree_submit_bio_hook+0x71/0xa6
>  submit_one_bio+0x55/0x66
>  read_extent_buffer_pages+0x1ec/0x2ab
>  btree_read_extent_buffer_pages+0x6e/0x237
>  ? alloc_extent_buffer+0x28f/0x2f2
>  read_tree_block+0x43/0x5e
>  open_ctree+0x139b/0x1ee4
>  btrfs_get_tree+0x357/0xa33
>  ? selinux_fs_context_dup+0x2d/0x104
>  vfs_get_tree+0x7a/0x162
>  btrfs_mount_root+0x52/0x8b
>  btrfs_get_tree+0x4ab/0xa33
>  ? vfs_parse_fs_string+0x5b/0x9e
>  vfs_get_tree+0x7a/0x162
>  do_mount+0x7f0/0x8b2
>  ? memdup_user+0x3e/0x5a
>  ksys_mount+0x72/0x97
>  __x64_sys_mount+0x21/0x24
>  do_syscall_64+0x7d/0x1a0
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x7f11b8365ada
> Code: 48 8b 0d c9 a3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 
> 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 
> 01 c3 48 8b 0d 96 a3 2b 00 f7 d8 64 89 01 48
> RSP: 002b:7ffe0d9a5b98 EFLAGS: 0246 ORIG_RAX: 00a5
> RAX: ffda RBX: 5562bf7702a0 RCX: 7f11b8365ada
> RDX: 5562bf77a620 RSI: 5562bf7704a0 RDI: 5562bf770480
> RBP:  R08:  R09: 7f11b8620c40
> R10: c0ed R11: 0246 R12: 5562bf770480
> R13: 5562bf77a620 R14:  R15: 7f11b90f9184
> 
> 



Re: [PATCH] btrfs-progs: rescue: Add ability to disable quota offline

2018-08-21 Thread Misono Tomohiro
On 2018/08/12 10:33, Qu Wenruo wrote:
> Provide an offline tool to disable quota.
> 
> For kernel which skip_balance doesn't work, there is no way to disable
> quota on huge fs with balance, as quota will cause balance to hang for a
> long long time for each tree block switch.
> 
> So add an offline rescue tool to disable quota.
> 
> Reported-by: Dan Merillat 
> Signed-off-by: Qu Wenruo 
> ---
> This can patch can be fetched from github repo:
> https://github.com/adam900710/btrfs-progs/tree/quota_disable
> ---
>  Documentation/btrfs-rescue.asciidoc |  6 +++
>  cmds-rescue.c   | 80 +
>  2 files changed, 86 insertions(+)
> 
> diff --git a/Documentation/btrfs-rescue.asciidoc 
> b/Documentation/btrfs-rescue.asciidoc
> index f94a0ff2b45e..fb088c1a768a 100644
> --- a/Documentation/btrfs-rescue.asciidoc
> +++ b/Documentation/btrfs-rescue.asciidoc
> @@ -31,6 +31,12 @@ help.
>  NOTE: Since *chunk-recover* will scan the whole device, it will be *VERY* 
> slow
>  especially executed on a large device.
>  
> +*disable-quota* ::
> +disable quota offline
> ++
> +Acts as a fallback method to disable quota for case where mount hangs due to
> +balance and quota.
> +
>  *fix-device-size* ::
>  fix device size and super block total bytes values that are do not match
>  +
> diff --git a/cmds-rescue.c b/cmds-rescue.c
> index 38c4ab9b2ef6..c7cd92427e9d 100644
> --- a/cmds-rescue.c
> +++ b/cmds-rescue.c
> @@ -250,6 +250,84 @@ out:
>   return !!ret;
>  }
>  
> +static const char * const cmd_rescue_disable_quota_usage[] = {
> + "btrfs rescue disable-quota ",
> + "Disable quota, especially useful for balance mount hang when quota 
> enabled",
> + "",
> + NULL
> +};
> +
> +static int cmd_rescue_disable_quota(int argc, char **argv)
> +{
> + struct btrfs_trans_handle *trans;
> + struct btrfs_fs_info *fs_info;
> + struct btrfs_path path;
> + struct btrfs_root *root;
> + struct btrfs_qgroup_status_item *qi;
> + struct btrfs_key key;
> + char *devname;
> + int ret;
> +
> + clean_args_no_options(argc, argv, cmd_rescue_disable_quota_usage);
> + if (check_argc_exact(argc, 2))
> + usage(cmd_rescue_disable_quota_usage);
> +
> + devname = argv[optind];
> + ret = check_mounted(devname);
> + if (ret < 0) {
> + error("could not check mount status: %s", strerror(-ret));
> + return !!ret;
> + } else if (ret) {
> + error("%s is currently mounted", devname);
> + return !!ret;
> + }
> + fs_info = open_ctree_fs_info(devname, 0, 0, 0, OPEN_CTREE_WRITES);
> + if (!fs_info) {
> + error("could not open btrfs");
> + ret = -EIO;
> + return !!ret;
> + }
> + root = fs_info->quota_root;
> + if (!root) {
> + printf("Quota is not enabled, no need to modify the fs\n");
> + goto close;
> + }
> + btrfs_init_path();
> + trans = btrfs_start_transaction(root, 1);
> + if (IS_ERR(trans)) {
> + ret = PTR_ERR(trans);
> + error("failed to start transaction: %s", strerror(-ret));
> + goto close;
> + }
> + key.objectid = 0;
> + key.type = BTRFS_QGROUP_STATUS_KEY;
> + key.offset = 0;
> + ret = btrfs_search_slot(trans, root, , , 0, 1);
> + if (ret < 0) {
> + error("failed to search tree: %s", strerror(-ret));
> + goto close;
> + }
> + if (ret > 0) {
> + printf(
> + "qgroup status item not found, not need to modify the fs");
> + ret = 0;
> + goto release;
> + }
> + qi = btrfs_item_ptr(path.nodes[0], path.slots[0],
> + struct btrfs_qgroup_status_item);
> + btrfs_set_qgroup_status_flags(path.nodes[0], qi,
> + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT);

This is not what "btrfs quota disable" does (completely removes quota tree),
so I think it is better to have different name other than "disable-quota"
(I'm not sure about good name, maybe suspend?).

btw, maybe we should add this ability to online fs too so that we can
temporally disable quota during balance while keeping limit/inheritance 
settings?

Thanks,
Misono

> + btrfs_mark_buffer_dirty(path.nodes[0]);
> + ret = btrfs_commit_transaction(trans, root);
> + if (ret < 0)
> + error("failed to commit transaction: %s", strerror(-ret));
> +release:
> + btrfs_release_path();
> +close:
> + close_ctree(fs_info->tree_root);
> + return !!ret;
> +}
> +
>  static const char rescue_cmd_group_info[] =
>  "toolbox for specific rescue operations";
>  
> @@ -262,6 +340,8 @@ const struct cmd_group rescue_cmd_group = {
>   { "zero-log", cmd_rescue_zero_log, cmd_rescue_zero_log_usage, 
> NULL, 0},
>   { "fix-device-size", cmd_rescue_fix_device_size,
>   cmd_rescue_fix_device_size_usage, NULL, 0},
> + 

Re: [PATCH v2 00/20] btrfs-progs: Rework of "subvolume list/show" and relax the root privileges of them

2018-08-21 Thread Misono Tomohiro
On 2018/08/16 3:12, David Sterba wrote:
> Hi,
> 
> so I've merged the first bath to devel. There was quite some whitespace
> damage and style issues, but I fixed that as I went through the pathes.

Thanks for taking this.

> 
> I've left out all patches that introduce new options, this is the hard
> part and single letter options cannot be added just randomly. That's
> what led to the current state.

Ok, but could you consider to add -f (11th path, or --follow instead of
short option) which is useful when btrfs is used for root fs
(or please remove cli-tests/010 for the moment from devel branch).

> 
> The semantics of current options is preserved, with the slight exception
> to the option -a.

Devel branch also excludes 13th patch and behavior of -a is not changed,
is it intended? Without the patch, there is no way to get the same result
of current "btrfs subvol list" when mount point is not FS_TREE.

> 
>> * Future todo:
>> If this approach is ok, I'd like to update the output of "sub list" more 
>> like:
>>   - Remove obsolete field (i.e. top-level) from output
> 
> Yeah the top-level field can be removed from the user output. Current
> idea of the fields printed by default is: id, generation, type, path.
> The type is a representation of eg. read-write status,
> subvolume/snapshot, and maybe received, or if it is default.
> 
> The remaining fields are on-demand by other options and the future proof
> solution is to provide the single option to specify which columns to
> print.
> 
> The inline format, ie. the current one, is not very comprehensive so the
> table layout should be the default for user.
> 
> The path format needs to be selected by a more concise way than just -A
> and possibly more added options. Eg. --path
> 
> * absolute (shorthand abs)
> * realative to the current path (pwd)
> * toplevel relative (toplevel)
> 
> In order to make it possible to distinguish in the output without prior
> knowledge of which one was selected, I propose this formatting:
> 
> * absolute path always starts with / and is really absolute to the /
> * relative path to the current directory starts with ./
> * toplevel does not start with ./ nor /
> 
> There are patches touching the libbtrfsutil code, so I'd like to hear
> from Omar what he thinks.

Also, I want to get acknowledgement about 7th patch
(as I wrote in cover letter, I'm ok with original version too).

> 
> What's now in devel should not change and provide base for further
> patches, so please use it if you're going to implement something from
> the above
Understood. I will consider them if current patches in devel has been settled.

Regards,
Misono



Re: [PATCH] btrfs: Always check nocow for quota enabled case to make sure we won't reserve unnecessary data space

2018-08-20 Thread Misono Tomohiro
On 2018/08/15 15:13, Qu Wenruo wrote:
> Commit c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
> makes nocow check less frequent to improve performance.
> 
> However for quota enabled case, such optimization could lead to extra
> unnecessary data reservation, which results failure for test case like
> btrfs/153 in fstests.
> 
> Fix it by reverting to old behavior for quota enabled case.
> 
> Fixes: c6887cd11149 ("Btrfs: don't do nocow check unless we have to")
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/file.c | 11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 51e77d72068a..f2ce1d707d4c 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1587,6 +1587,7 @@ static noinline ssize_t __btrfs_buffered_write(struct 
> file *file,
>   int ret = 0;
>   bool only_release_metadata = false;
>   bool force_page_uptodate = false;
> + bool quota_enabled = test_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>  
>   nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
>   PAGE_SIZE / (sizeof(struct page *)));
> @@ -1627,9 +1628,15 @@ static noinline ssize_t __btrfs_buffered_write(struct 
> file *file,
>   fs_info->sectorsize);
>  
>   extent_changeset_release(data_reserved);
> - ret = btrfs_check_data_free_space(inode, _reserved, pos,
> + /*
> +  * If we have quota enabled, we must do the heavy lift nocow
> +  * check here to avoid reserving data space, or we can hit
> +  * limitation for NOCOW files.
> +  */
> + if (!quota_enabled)
> + ret = btrfs_check_data_free_space(inode, 
> _reserved, pos,
> write_bytes);
> - if (ret < 0) {
> + if (ret < 0 || quota_enabled) {
>   if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
> BTRFS_INODE_PREALLOC)) 
> &&>check_can_nocow(BTRFS_I(inode), pos,
> 

This fixes btrfs/153 but in turn makes other qgroup tests fail (022,091 etc.)

If quota is enabled and file is not marked as nocow, above if is false and 
write will stop.
so I think we still need to call btrfs_check_dta_free_space() when quota is 
enabled
and file is not nocow, right?



Re: [PATCH v2 00/20] btrfs-progs: Rework of "subvolume list/show" and relax the root privileges of them

2018-08-09 Thread Misono Tomohiro
On 2018/08/03 22:46, David Sterba wrote:
> On Wed, Jul 04, 2018 at 05:14:59PM +0900, Misono Tomohiro wrote:
>> Gentle ping, as this is related to the new ioctls merged in 4.18-rc1.
> 
> Due to me spending more time than expected on kernel, this patchset will
> be merged partially or the 4.18 will be deleayed.

github version is rebased to 4.17.1:
  https://github.com/t-msn/btrfs-progs/tree/rework-sub-list

I will take a vacation from tomorrow and my response may be delayed.

Regards,
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] btrfs: qgroup: Dirty all qgroups before rescan

2018-08-09 Thread Misono Tomohiro
On 2018/08/09 16:08, Qu Wenruo wrote:
> [BUG]
> In the following case, rescan won't zero out the number of qgroup 1/0:
> --
> $ mkfs.btrfs -fq $DEV
> $ mount $DEV /mnt
> 
> $ btrfs quota enable /mnt
> $ btrfs qgroup create 1/0 /mnt
> $ btrfs sub create /mnt/sub
> $ btrfs qgroup assign 0/257 1/0 /mnt
> 
> $ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
> $ btrfs sub snap /mnt/sub /mnt/snap
> $ btrfs quota rescan -w /mnt
> $ btrfs qgroup show -pcre /mnt
> qgroupid rfer excl max_rfer max_excl parent  child
>      --  -
> 0/5  16.00KiB 16.00KiB none none --- ---
> 0/257  1016.00KiB 16.00KiB none none 1/0 ---
> 0/258  1016.00KiB 16.00KiB none none --- ---
> 1/01016.00KiB 16.00KiB none none --- 0/257
> 
> so far so good, but:
> 
> $ btrfs qgroup remove 0/257 1/0 /mnt
> WARNING: quotas may be inconsistent, rescan needed
> $ btrfs quota rescan -w /mnt
> $ btrfs qgroup show -pcre  /mnt
> qgoupid rfer excl max_rfer max_excl parent  child
>      --  -
> 0/5  16.00KiB 16.00KiB none none --- ---
> 0/257  1016.00KiB 16.00KiB none none --- ---
> 0/258  1016.00KiB 16.00KiB none none --- ---
> 1/01016.00KiB 16.00KiB none none --- ---
>^^  not cleared
> --
> 
> [CAUSE]
> Before rescan we call qgroup_rescan_zero_tracking() to zero out all
> qgroups' accounting numbers.
> 
> However we don't mark all qgroups dirty, but rely on rescan to mark
> qgroups dirty.
> 
> If we have any high level qgroup but without any child (orphan group), it
> won't be marked dirty during rescan, since we can not reach that qgroup.
> 
> This will cause QGROUP_INFO items of orphan qgroups never get updated in
> quota tree, thus their numbers will stay the same in "btrfs qgroup show"
> output.
> 
> [FIX]
> Just mark all qgroups dirty in qgroup_rescan_zero_tracking(), so even we
> have orphan qgroups their QGROUP_INFO items will still get updated during
> rescan.
> 
> Reported-by: Misono Tomohiro 
> Signed-off-by: Qu Wenruo 
> ---
> changelog:
> v2:
>   Fix some grammar errors in commit message.
> ---
>  fs/btrfs/qgroup.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 48c1c3e7baf3..5a5372b33d96 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -2864,6 +2864,7 @@ qgroup_rescan_zero_tracking(struct btrfs_fs_info 
> *fs_info)
>   qgroup->rfer_cmpr = 0;
>   qgroup->excl = 0;
>   qgroup->excl_cmpr = 0;
> + qgroup_dirty(fs_info, qgroup);
>   }
>   spin_unlock(_info->qgroup_lock);
>  }
> 
Yes, this and previous patch
  [PATCH] btrfs: qgroup: Don't populating excl numbers for snapshot src if it 
belongs to other qgroups
resolves the problem I see.

So, this will reset all the qgroup items not rescanned in first transaction 
commit
by btrfs_run_qgroups(), but I don't think it is a problem.

Tested-by/Reviewed-by: Misono Tomohiro 

Thanks,
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5] btrfs: qgroup: Remove qgroup items along with subvolume deletion

2018-08-09 Thread Misono Tomohiro
When qgroup is on, subvolume deletion does not remove qgroup items
of the subvolume (qgroup info, limit, relation) from quota tree and
they need to get removed manually by "btrfs qgroup destroy".

Since level 0 qgroup cannot be used/inherited by any other subvolume,
let's remove them automatically when subvolume is deleted
(to be precise, when the subvolume root is dropped).

Signed-off-by: Misono Tomohiro 
---
v4 -> v5:
  Commit current transaction before calling btrfs_remove_qgroup() to
  keep qgroup consistency in all case. This resolves the concern in
  v4 path and there should be no demerit in this patch.

 fs/btrfs/extent-tree.c | 45 +
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e7b237b9547..ed052105e741 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_root_item *root_item = >root_item;
struct walk_control *wc;
struct btrfs_key key;
+   u64 objectid = root->root_key.objectid;
int err = 0;
int ret;
int level;
bool root_dropped = false;
 
-   btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
+   btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
 
path = btrfs_alloc_path();
if (!path) {
@@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
 
-   if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+   if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_find_root(tree_root, >root_key, path,
  NULL, NULL);
if (ret < 0) {
@@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 *
 * The most common failure here is just -ENOENT.
 */
-   btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
+   btrfs_del_orphan_item(trans, tree_root, objectid);
}
}
 
@@ -9056,6 +9056,43 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_put_fs_root(root);
}
root_dropped = true;
+
+   if (test_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags)) {
+   /*
+* Remove level-0 qgroup items since no other subvolume can
+* use them.
+*
+* First, commit current transaction in order to make sure
+* this subvolume's excl == rfer == 0. Otherwise removing
+* qgroup relation causes qgroup inconsistency if excl != rfer.
+*/
+   ret = btrfs_commit_transaction(trans);
+   if (ret)
+   goto out_free;
+
+   /* Start new transaction and remove qgroup items */
+   trans = btrfs_start_transaction(tree_root, 0);
+   if (IS_ERR(trans)) {
+   err = PTR_ERR(trans);
+   goto out_free;
+   }
+
+   ret = btrfs_remove_qgroup(trans, objectid);
+   if (ret == 1) {
+   /*
+* This means qgroup becomes inconsistent
+* (should not happen since we did transaction commit)
+*/
+   btrfs_warn(fs_info,
+   "qgroup inconsistency found, need qgroup rescan");
+   } else if (ret == -EINVAL || ret == -ENOENT) {
+   /* qgroup is already removed, just ignore this */
+   } else if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   err = ret;
+   }
+   }
+
 out_end_trans:
btrfs_end_transaction_throttle(trans);
 out_free:
-- 
2.14.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] btrfs: qgroup: Remove qgroup items along with subvolume deletion

2018-08-09 Thread Misono Tomohiro
On 2018/08/09 15:14, Qu Wenruo wrote:
> 
> 
> On 8/9/18 2:05 PM, Misono Tomohiro wrote:
>> On 2018/08/09 14:47, Qu Wenruo wrote:
>>>
>>>
>>> On 8/9/18 12:12 PM, Misono Tomohiro wrote:
>>>> When qgroup is on, subvolume deletion does not remove qgroup items
>>>> of the subvolume (qgroup info, limit, relation) from quota tree and
>>>> they need to get removed manually by "btrfs qgroup destroy".
>>>>
>>>> Since level 0 qgroup cannot be used/inherited by any other subvolume,
>>>> let's remove them automatically when subvolume is deleted
>>>> (to be precise, when the subvolume root is dropped).
>>>>
>>>> Note that qgroup becomes inconsistent in following case:
>>>>   1. qgroup relation exists
>>>>   2. and subvolume's excl != rref
>>>
>>> That's a little strange.
>>>
>>> If a subvolume is completely dropped, its excl should be the same rfer,
>>> all 0, and removing its relationship should not mark qgroup inconsistent.
>>>
>>> So the problem is the timing when btrfs_remove_qgroup() is called.
>>>
>>> Since qgroup accounting is only called at transaction commit time, and
>>> we're holding a trans handler, it's almost ensured we can't commit this
>>> transaction, thus the number is not updated yet (still not 0)
>>>
>>> So that's why qgroup is inconsistent.
>>>
>>> What about commit current transaction and then call btrfs_remove_qgroup()?
>>>
>>> (Sorry I didn't catch this problem last time I reviewed this patch)
>>
>> well, I'm little confusing about flow of transaction commit.
>> btrfs_drop_snapshot() is called from cleaner_kthread and
>> is it ok to commit transaction in it?
> 
> Not completely clear of the cleaner_kthread(), but from what I see in
> btrfs_drop_snapshot(), btrfs_end_transaction_throttle() itself could
> commit current transaction.
> 
> So in theory we should be OK to finish all the original work of
> btrfs_drop_snapshot(), and then commit current transaction, and finally
> do the qgroup cleanup work.
> 
> But I could totally be wrong, and feel free to point what I'm missing.

Thank you very much for explanation.
I changed code to commit transaction and it works,
so I hope next version will solve all the problem.

Thanks,
Misono

> 
> Thanks,
> Qu
> 
>>
>>>
>>> Thanks,
>>> Qu
>>>
>>>> In this case manual qgroup rescan is needed.
>>>>
>>>> Reviewed-by: Lu Fengqi 
>>>> Reviewed-by: Qu Wenruo 
>>>> Signed-off-by: Misono Tomohiro 
>>>> ---
>>>> Hi David,
>>>> It turned out that this patch may cause qgroup inconsistency in case
>>>> described above and need manual rescan. Since current code will keep 
>>>> qgroup items but not break qgroup consistency when deleting subvolume,
>>>> I cannot clearly say which behavior is better for qgroup usability.
>>>> Can I ask your opinion?
>>>>
>>>> v3 -> v4:
>>>>   Check return value of btrfs_remove_qgroup() and if it is 1,
>>>>   print message in syslog that fs needs qgroup rescan
>>>>
>>>>  fs/btrfs/extent-tree.c | 22 ++
>>>>  1 file changed, 18 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>>>> index 9e7b237b9547..828d9e68047d 100644
>>>> --- a/fs/btrfs/extent-tree.c
>>>> +++ b/fs/btrfs/extent-tree.c
>>>> @@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>>>struct btrfs_root_item *root_item = >root_item;
>>>>struct walk_control *wc;
>>>>struct btrfs_key key;
>>>> +  u64 objectid = root->root_key.objectid;
>>>>int err = 0;
>>>>int ret;
>>>>int level;
>>>>bool root_dropped = false;
>>>>  
>>>> -  btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
>>>> +  btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
>>>>  
>>>>path = btrfs_alloc_path();
>>>>if (!path) {
>>>> @@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>>>goto out_end_trans;
>>>>}
>>>>  
>>>> -  if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
>>>> +  if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
>>>

Re: [PATCH v4] btrfs: qgroup: Remove qgroup items along with subvolume deletion

2018-08-09 Thread Misono Tomohiro
On 2018/08/09 14:47, Qu Wenruo wrote:
> 
> 
> On 8/9/18 12:12 PM, Misono Tomohiro wrote:
>> When qgroup is on, subvolume deletion does not remove qgroup items
>> of the subvolume (qgroup info, limit, relation) from quota tree and
>> they need to get removed manually by "btrfs qgroup destroy".
>>
>> Since level 0 qgroup cannot be used/inherited by any other subvolume,
>> let's remove them automatically when subvolume is deleted
>> (to be precise, when the subvolume root is dropped).
>>
>> Note that qgroup becomes inconsistent in following case:
>>   1. qgroup relation exists
>>   2. and subvolume's excl != rref
> 
> That's a little strange.
> 
> If a subvolume is completely dropped, its excl should be the same rfer,
> all 0, and removing its relationship should not mark qgroup inconsistent.
> 
> So the problem is the timing when btrfs_remove_qgroup() is called.
> 
> Since qgroup accounting is only called at transaction commit time, and
> we're holding a trans handler, it's almost ensured we can't commit this
> transaction, thus the number is not updated yet (still not 0)
> 
> So that's why qgroup is inconsistent.
> 
> What about commit current transaction and then call btrfs_remove_qgroup()?
> 
> (Sorry I didn't catch this problem last time I reviewed this patch)

well, I'm little confusing about flow of transaction commit.
btrfs_drop_snapshot() is called from cleaner_kthread and
is it ok to commit transaction in it?

> 
> Thanks,
> Qu
> 
>> In this case manual qgroup rescan is needed.
>>
>> Reviewed-by: Lu Fengqi 
>> Reviewed-by: Qu Wenruo 
>> Signed-off-by: Misono Tomohiro 
>> ---
>> Hi David,
>> It turned out that this patch may cause qgroup inconsistency in case
>> described above and need manual rescan. Since current code will keep 
>> qgroup items but not break qgroup consistency when deleting subvolume,
>> I cannot clearly say which behavior is better for qgroup usability.
>> Can I ask your opinion?
>>
>> v3 -> v4:
>>   Check return value of btrfs_remove_qgroup() and if it is 1,
>>   print message in syslog that fs needs qgroup rescan
>>
>>  fs/btrfs/extent-tree.c | 22 ++
>>  1 file changed, 18 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 9e7b237b9547..828d9e68047d 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  struct btrfs_root_item *root_item = >root_item;
>>  struct walk_control *wc;
>>  struct btrfs_key key;
>> +u64 objectid = root->root_key.objectid;
>>  int err = 0;
>>  int ret;
>>  int level;
>>  bool root_dropped = false;
>>  
>> -btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
>> +btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
>>  
>>  path = btrfs_alloc_path();
>>  if (!path) {
>> @@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  goto out_end_trans;
>>  }
>>  
>> -if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
>> +if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
>>  ret = btrfs_find_root(tree_root, >root_key, path,
>>NULL, NULL);
>>  if (ret < 0) {
>> @@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>   *
>>   * The most common failure here is just -ENOENT.
>>   */
>> -btrfs_del_orphan_item(trans, tree_root,
>> -  root->root_key.objectid);
>> +btrfs_del_orphan_item(trans, tree_root, objectid);
>>  }
>>  }
>>  
>> @@ -9056,6 +9056,20 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  btrfs_put_fs_root(root);
>>  }
>>  root_dropped = true;
>> +
>> + /* Remove level-0 qgroup items since no other subvolume can use them */
>> +ret = btrfs_remove_qgroup(trans, objectid);
>> +if (ret == 1) {
>> +/* This means qgroup becomes inconsistent by removing items */
>> +btrfs_info(fs_info,
>> +"qgroup inconsistency found, need qgroup rescan");
>> +} else if (ret == -EINVAL || ret == -ENOENT) {
>> +/* qgroup is not enabled or already removed, just ignore this */
>> +} else if (ret) {
>> +btrfs_abort_transaction(trans, ret);
>> +err = ret;
>> +}
>> +
>>  out_end_trans:
>>  btrfs_end_transaction_throttle(trans);
>>  out_free:
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4] btrfs: qgroup: Remove qgroup items along with subvolume deletion

2018-08-08 Thread Misono Tomohiro
When qgroup is on, subvolume deletion does not remove qgroup items
of the subvolume (qgroup info, limit, relation) from quota tree and
they need to get removed manually by "btrfs qgroup destroy".

Since level 0 qgroup cannot be used/inherited by any other subvolume,
let's remove them automatically when subvolume is deleted
(to be precise, when the subvolume root is dropped).

Note that qgroup becomes inconsistent in following case:
  1. qgroup relation exists
  2. and subvolume's excl != rref
In this case manual qgroup rescan is needed.

Reviewed-by: Lu Fengqi 
Reviewed-by: Qu Wenruo 
Signed-off-by: Misono Tomohiro 
---
Hi David,
It turned out that this patch may cause qgroup inconsistency in case
described above and need manual rescan. Since current code will keep 
qgroup items but not break qgroup consistency when deleting subvolume,
I cannot clearly say which behavior is better for qgroup usability.
Can I ask your opinion?

v3 -> v4:
  Check return value of btrfs_remove_qgroup() and if it is 1,
  print message in syslog that fs needs qgroup rescan

 fs/btrfs/extent-tree.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e7b237b9547..828d9e68047d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_root_item *root_item = >root_item;
struct walk_control *wc;
struct btrfs_key key;
+   u64 objectid = root->root_key.objectid;
int err = 0;
int ret;
int level;
bool root_dropped = false;
 
-   btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
+   btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
 
path = btrfs_alloc_path();
if (!path) {
@@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
 
-   if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+   if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_find_root(tree_root, >root_key, path,
  NULL, NULL);
if (ret < 0) {
@@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 *
 * The most common failure here is just -ENOENT.
 */
-   btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
+   btrfs_del_orphan_item(trans, tree_root, objectid);
}
}
 
@@ -9056,6 +9056,20 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_put_fs_root(root);
}
root_dropped = true;
+
+/* Remove level-0 qgroup items since no other subvolume can use them */
+   ret = btrfs_remove_qgroup(trans, objectid);
+   if (ret == 1) {
+   /* This means qgroup becomes inconsistent by removing items */
+   btrfs_info(fs_info,
+   "qgroup inconsistency found, need qgroup rescan");
+   } else if (ret == -EINVAL || ret == -ENOENT) {
+   /* qgroup is not enabled or already removed, just ignore this */
+   } else if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   err = ret;
+   }
+
 out_end_trans:
btrfs_end_transaction_throttle(trans);
 out_free:
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: qgroup: Always remove all qgroup relation in btrfs_remove_qgroup()

2018-08-08 Thread Misono Tomohiro
In btrfs_remove_qgroup(), each qgroup relation is removed by calling
__del_qgroup_relation(). However, __del_qgroup_relation() returns 1
if deletion of qgroup relation causes inconsistency and current code
exits immediately  in that case.

Therefore if there are several qgroup relations and removing first
relation causes inconsistency, remaining items will not be removed.

Fix this by continuing to remove items if return value of
__del_qgroup_relation() is 1.

Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/qgroup.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2ba29f0609d9..f18284253e77 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1428,12 +1428,21 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle 
*trans, u64 qgroupid)
goto out;
 
while (!list_empty(>groups)) {
+   int ret2;
+
list = list_first_entry(>groups,
struct btrfs_qgroup_list, next_group);
-   ret = __del_qgroup_relation(trans, qgroupid,
+   ret2 = __del_qgroup_relation(trans, qgroupid,
list->group->qgroupid);
-   if (ret)
-   goto out;
+   if (ret2) {
+   ret = ret2;
+   /*
+* __del_qgroup_relation() returns 1 if qgroup becomes
+* inconsistent. Continue to remove items in that case.
+*/
+   if (ret != 1)
+   goto out;
+   }
}
 
spin_lock(_info->qgroup_lock);
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: qgroup: Don't populating excl numbers for snapshot src if it belongs to other qgroups

2018-08-08 Thread Misono Tomohiro
On 2018/08/08 15:04, Qu Wenruo wrote:
> When quota is enabled and we do a snapshot, we just update the 'excl'
> number of both snapshot src and dst to src's 'rfer' - nodesize.
> 
> It's a quick hack to avoid quota rescan every time we create a snapshot
> and it works if src doesn't belong to other qgroups.
> 
> But if we have higher level qgroups, such behavior only works for level
> 0 qgroups, and higher level qgroups don't get update, thus making qgroup
> number inconsistent.
> 
> The problem of updating higher level qgroup numbers is, it's way to
> complex.
> 
> Under the following case, it's pretty simple: (src is 257, dst is 258)
> 0/257 - 1/0, 0/258.
> 
> In this case, we only need to modify 1/0 to reduce its 'excl'
> 
> But under the following case, it will go out of control:
> 
> 0/257 - 1/0, 0/258 - 1/1 (using -i option), 1/0 - 2/0, 1/1 - 2/0.
> 
> So to make it simple, if snapshot src has higher level qgroups, just
> mark qgroup inconsistent and let later rescan to do its job.
> 
> Reported-by: Misono Tomohiro 
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/qgroup.c | 16 
>  1 file changed, 16 insertions(+)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index ec4351fd7537..2b3d2dd1b735 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -2298,6 +2298,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle 
> *trans,
>   if (!srcgroup)
>   goto unlock;
>  
> + /*
> +  * If snapshot source is belonging to high level qgroups, it
> +  * will be a more complex to hack the numbers.
> +  * E.g. source is 257, snapshot is 258:
> +  * 0/257 - 1/0, creating snapshot 258 will need to update 1/0
> +  * It's too complex when higher level qgroup is involved.
> +  * Mark qgroup inconsistent for later rescan
> +  */
> + if (!list_empty(>groups)) {
> + btrfs_info_rl(fs_info,
> +"src qgroup 0/%llu belongs to higher level qgroup, creating snapshot for it 
> need qgroup rescan",
> +   srcid);
> + fs_info->qgroup_flags |=
> + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> + goto unlock;
> + }
>   /*
>* We call inherit after we clone the root in order to make sure
>* our counts don't go crazy, so at this point the only
> 

Thanks for the quick fix.
Tested-by/Reviewed-by: Misono Tomohiro 

However there is still another problem about removing relation:

(4.18-rc7 with above patch)
$ mkfs.btrfs -fq $DEV
$ mount $DEV /mnt

$ btrfs quota enable /mnt
$ btrfs qgroup create 1/0 /mnt
$ btrfs sub create /mnt/sub
$ btrfs qgroup assign 0/257 1/0 /mnt

$ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
$ btrfs sub snap /mnt/sub /mnt/snap
$ dmesg | tail -n 1
BTRFS info (device sdb7): src qgroup 0/257 belongs to higher level qgroup,
 creating snapshot for it need qgroup rescan

$ btrfs quota rescan -w /mnt
$ btrfs qgroup show -pcre /mnt
qgroupid rfer excl max_rfer max_excl parent  child
     --  -
0/5  16.00KiB 16.00KiB none none --- ---
0/257  1016.00KiB 16.00KiB none none 1/0 ---
0/258  1016.00KiB 16.00KiB none none --- ---
1/01016.00KiB 16.00KiB none none --- 0/257
  
so far so good, but:

$ btrfs qgroup remove 0/257 1/0 /mnt
WARNING: quotas may be inconsistent, rescan needed
$ btrfs quota rescan -w /mnt
$ btrfs qgroup show -pcre  /mnt
qgoupid rfer excl max_rfer max_excl parent  child
     --  -
0/5  16.00KiB 16.00KiB none none --- ---
0/257  1016.00KiB 16.00KiB none none --- ---
0/258  1016.00KiB 16.00KiB none none --- ---
1/01016.00KiB 16.00KiB none none --- ---
   ^^  not cleared

It seems some fix is needed for rescan too.

Thanks,
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] btrfs: qgroup: Remove qgroup items along with subvolume deletion

2018-08-07 Thread Misono Tomohiro
On 2018/08/08 1:23, Lu Fengqi wrote:
> On Mon, Aug 06, 2018 at 01:53:28PM +0900, Misono Tomohiro wrote:
>> When qgroup is on, subvolume deletion does not remove qgroup items
>> of the subvolume (qgroup info, limit, relation) from quota tree and
>> they need to get removed manually by "btrfs qgroup destroy".
>>
>> Since level 0 qgroup cannot be used/inherited by any other subvolume,
>> let's remove them automatically when subvolume is deleted
>> (to be precise, when the subvolume root is dropped).
>>
>> Reviewed-by: Lu Fengqi 
>> Reviewed-by: Qu Wenruo 
>> Signed-off-by: Misono Tomohiro 
>> ---
>> v2 -> v3:
>>  Use root->root_key.objectid instead of root->objectid
>>  Add Reviewed-by tag
>>
>> v1 -> v2:
>>  Move call of btrfs_remove_qgroup() from btrfs_delete_subvolume()
>>  to btrfs_snapshot_destroy() so that it will be called after the
>>  subvolume root is really dropped
>>
>> fs/btrfs/extent-tree.c | 16 
>> 1 file changed, 12 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 9e7b237b9547..48edf839ed2c 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  struct btrfs_root_item *root_item = >root_item;
>>  struct walk_control *wc;
>>  struct btrfs_key key;
>> +u64 objectid = root->root_key.objectid;
>>  int err = 0;
>>  int ret;
>>  int level;
>>  bool root_dropped = false;
>>
>> -btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
>> +btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
>>
>>  path = btrfs_alloc_path();
>>  if (!path) {
>> @@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  goto out_end_trans;
>>  }
>>
>> -if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
>> +if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
>>  ret = btrfs_find_root(tree_root, >root_key, path,
>>NULL, NULL);
>>  if (ret < 0) {
>> @@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>   *
>>   * The most common failure here is just -ENOENT.
>>   */
>> -btrfs_del_orphan_item(trans, tree_root,
>> -  root->root_key.objectid);
>> +btrfs_del_orphan_item(trans, tree_root, objectid);
>>  }
>>  }
>>
>> @@ -9056,6 +9056,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  btrfs_put_fs_root(root);
>>  }
>>  root_dropped = true;
>> +
>> + /* Remove level-0 qgroup items since no other subvolume can use them */
>> +ret = btrfs_remove_qgroup(trans, objectid);
>> +if (ret && ret != -EINVAL && ret != -ENOENT) {
> 
> I'm sorry for missing the snapshot case. If it is a snapshot, then when
> we remove the relevant qgroup, we will not be able to perform
> quick_update_accounting(), and it will return 1. So we shouldn't abort
> the transaction when the return value = 1.
> 
> btrfs_remove_qgroup
> -> __del_qgroup_relation
>-> quick_update_accounting << if qgroup->excl != qgroup->rfer; return 1
> 

oh, thanks for pointing out, I missed that case...
I wonder if there is a way to keep qgroup consistency without enforcing rescan.

btw, I noticed that removing qgroup relation when excl != rfer causes
some problem without this patch:

(4.18-rc7)
$ mkfs.btrfs -fq $DEV
$ mount $DEV /mnt

$ btrfs quota enable /mnt
$ btrfs qgroup create 1/0 /mnt
$ btrfs sub create /mnt/sub
$ btrfs qgroup assign 0/257 1/0 /mnt

$ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
$ btrfs sub snap /mnt/sub /mnt/snap
$ dd if=/dev/urandom of=/mnt/sub/file2 bs=1k count=1000

$ btrfs fi sync /mnt
$ btrfs qgroup show -pcre /mnt
qgroupid rfer excl max_rfer max_excl parent  child
     --  -
0/5  16.00KiB 16.00KiB none none --- ---
0/257 1.97MiB   1016.00KiB none none 1/0 ---
0/258  1016.00KiB 16.00KiB none none --- ---
1/0   1.97MiB  1.97MiB none none --- 0/257
   ^^^ should be 1016Kib

$ btrfs qgroup remove 0/257 1/0 /mnt
WARNING: quotas may be in

Re: [RFC PATCH] btrfs: Remove 'objectid' member from struct btrfs_root

2018-08-06 Thread Misono Tomohiro
On 2018/08/06 15:17, Qu Wenruo wrote:
> 
> 
> On 2018年08月06日 13:25, Misono Tomohiro wrote:
>> There are two members in struct btrfs_root which indicate root's
>> objectid: ->objectid and ->root_key.objectid.
>>
>> They are both set to the same value in __setup_root():
>>   static void __setup_root(struct btrfs_root *root,
>>struct btrfs_fs_info *fs_info,
>>u64 objectid)
>>   {
>> ...
>> root->objectid = objectid;
>> ...
>> root->root_key.objectid = objecitd;
>> ...
>>   }
>> and not changed to other value after initialization.
>>
>> grep in btrfs directory shows both are used in many places:
>>   $ grep -rI "root->root_key.objectid" | wc -l
>>   133
>>   $ grep -rI "root->objectid" | wc -l
>>   55
>>  (4.17, inc. some noise)
>>
>> It is confusing to have two similar variable names and it seems
>> that there is no rule about which should be used in a certain case.
>>
>> Since ->root_key itself is needed for tree reloc tree, let's remove
>> 'objecitd' member and unify code to use ->root_key.objectid in all places.
> 
> It's a pretty nice move, just a small nitpick about __setup_root()
> inlined later.
> (And a personal crazy idea no need to address)
> 
>>
>> Signed-off-by: Misono Tomohiro 
> 
> Feel free to add my tag:
> Reviewed-by: Qu Wenruo 

Thanks for the review.

> 
>> ---
>> Although being fundamentally independent, this is based on the
>> patch: https://patchwork.kernel.org/patch/10556485/
>> since it also touches root->objectid.
>>
>>  fs/btrfs/backref.c   |  5 +++--
>>  fs/btrfs/btrfs_inode.h   |  8 
>>  fs/btrfs/ctree.c |  2 +-
>>  fs/btrfs/ctree.h |  1 -
>>  fs/btrfs/delayed-inode.c |  5 +++--
>>  fs/btrfs/disk-io.c   |  5 ++---
>>  fs/btrfs/export.c|  4 ++--
>>  fs/btrfs/inode.c |  2 +-
>>  fs/btrfs/ioctl.c |  2 +-
>>  fs/btrfs/qgroup.c| 23 ---
>>  fs/btrfs/ref-verify.c|  8 
>>  fs/btrfs/relocation.c|  3 ++-
>>  fs/btrfs/send.c  | 16 
>>  fs/btrfs/super.c |  6 --
>>  fs/btrfs/transaction.c   |  4 ++--
>>  include/trace/events/btrfs.h | 15 ---
>>  16 files changed, 57 insertions(+), 52 deletions(-)
> 
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index 318be7864072..5be7c2bc45c0 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -1202,7 +1202,6 @@ struct btrfs_root {
>>  int last_log_commit;
>>  pid_t log_start_pid;
>>  
>> -u64 objectid;
> 
> Off topic crazy idea here.
> 
> I think it is a little crazy, but it should save a lot of objectid
> related modification:
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 118346aceea9..e6d70f2309a3 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1166,7 +1166,10 @@ struct btrfs_root {
> 
> unsigned long state;
> struct btrfs_root_item root_item;
> -   struct btrfs_key root_key;
> +   union {
> +   struct btrfs_key root_key;
> +   u64 objectid;
> +   };
> struct btrfs_fs_info *fs_info;
> struct extent_io_tree dirty_log_pages;
> 
> @@ -1198,7 +1201,6 @@ struct btrfs_root {
> int last_log_commit;
> pid_t log_start_pid;
> 
> -   u64 objectid;
> u64 last_trans;
> 
> u32 type;
> 
> I'm not sure if this is a really crazy idea or a dirty hack to reduce
> some modification.
> Anyway, I'm completely fine with current patch.
> 
>>  u64 last_trans;
>>  
>>  u32 type;
>> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
>> index f51b509f2d9b..07187e4ab600 100644
>> --- a/fs/btrfs/delayed-inode.c
>> +++ b/fs/btrfs/delayed-inode.c
>> @@ -1462,7 +1462,7 @@ int btrfs_insert_delayed_dir_index(struct 
>> btrfs_trans_handle *trans,
>>  if (unlikely(ret)) {
>>  btrfs_err(trans->fs_info,
>>"err add delayed dir index item(name: %.*s) into the 
>> insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: 
>> %d)",
>> -  name_len, name, delayed_node->root->objectid,
>> +  name_len, name, delayed_node->root->root_key.objectid,
>>  

[RFC PATCH] btrfs: Remove 'objectid' member from struct btrfs_root

2018-08-05 Thread Misono Tomohiro
There are two members in struct btrfs_root which indicate root's
objectid: ->objectid and ->root_key.objectid.

They are both set to the same value in __setup_root():
  static void __setup_root(struct btrfs_root *root,
   struct btrfs_fs_info *fs_info,
   u64 objectid)
  {
...
root->objectid = objectid;
...
root->root_key.objectid = objecitd;
...
  }
and not changed to other value after initialization.

grep in btrfs directory shows both are used in many places:
  $ grep -rI "root->root_key.objectid" | wc -l
  133
  $ grep -rI "root->objectid" | wc -l
  55
 (4.17, inc. some noise)

It is confusing to have two similar variable names and it seems
that there is no rule about which should be used in a certain case.

Since ->root_key itself is needed for tree reloc tree, let's remove
'objecitd' member and unify code to use ->root_key.objectid in all places.

Signed-off-by: Misono Tomohiro 
---
Although being fundamentally independent, this is based on the
patch: https://patchwork.kernel.org/patch/10556485/
since it also touches root->objectid.

 fs/btrfs/backref.c   |  5 +++--
 fs/btrfs/btrfs_inode.h   |  8 
 fs/btrfs/ctree.c |  2 +-
 fs/btrfs/ctree.h |  1 -
 fs/btrfs/delayed-inode.c |  5 +++--
 fs/btrfs/disk-io.c   |  5 ++---
 fs/btrfs/export.c|  4 ++--
 fs/btrfs/inode.c |  2 +-
 fs/btrfs/ioctl.c |  2 +-
 fs/btrfs/qgroup.c| 23 ---
 fs/btrfs/ref-verify.c|  8 
 fs/btrfs/relocation.c|  3 ++-
 fs/btrfs/send.c  | 16 
 fs/btrfs/super.c |  6 --
 fs/btrfs/transaction.c   |  4 ++--
 include/trace/events/btrfs.h | 15 ---
 16 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index ae750b1574a2..84006e3dd105 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1468,7 +1468,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, 
u64 bytenr)
struct seq_list elem = SEQ_LIST_INIT(elem);
int ret = 0;
struct share_check shared = {
-   .root_objectid = root->objectid,
+   .root_objectid = root->root_key.objectid,
.inum = inum,
.share_count = 0,
};
@@ -2031,7 +2031,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root 
*fs_root,
/* path must be released before calling iterate()! */
btrfs_debug(fs_root->fs_info,
"following ref at offset %u for inode %llu in 
tree %llu",
-   cur, found_key.objectid, fs_root->objectid);
+   cur, found_key.objectid,
+   fs_root->root_key.objectid);
ret = iterate(parent, name_len,
  (unsigned long)(iref + 1), eb, ctx);
if (ret)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 1343ac57b438..97d91e55b70a 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -206,7 +206,7 @@ static inline struct btrfs_inode *BTRFS_I(const struct 
inode *inode)
 static inline unsigned long btrfs_inode_hash(u64 objectid,
 const struct btrfs_root *root)
 {
-   u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
+   u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME);
 
 #if BITS_PER_LONG == 32
h = (h >> 32) ^ (h & 0x);
@@ -339,15 +339,15 @@ static inline void btrfs_print_data_csum_error(struct 
btrfs_inode *inode,
struct btrfs_root *root = inode->root;
 
/* Output minus objectid, which is more meaningful */
-   if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
+   if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
btrfs_warn_rl(root->fs_info,
"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 
0x%08x mirror %d",
-   root->objectid, btrfs_ino(inode),
+   root->root_key.objectid, btrfs_ino(inode),
logical_start, csum, csum_expected, mirror_num);
else
btrfs_warn_rl(root->fs_info,
"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 
0x%08x mirror %d",
-   root->objectid, btrfs_ino(inode),
+   root->root_key.objectid, btrfs_ino(inode),
logical_start, csum, csum_expected, mirror_num);
 }
 
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d436fb4c002e..1f71695cb0a8 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -207,7 +207,7 @@ static void

[PATCH v3] btrfs: qgroup: Remove qgroup items along with subvolume deletion

2018-08-05 Thread Misono Tomohiro
When qgroup is on, subvolume deletion does not remove qgroup items
of the subvolume (qgroup info, limit, relation) from quota tree and
they need to get removed manually by "btrfs qgroup destroy".

Since level 0 qgroup cannot be used/inherited by any other subvolume,
let's remove them automatically when subvolume is deleted
(to be precise, when the subvolume root is dropped).

Reviewed-by: Lu Fengqi 
Reviewed-by: Qu Wenruo 
Signed-off-by: Misono Tomohiro 
---
v2 -> v3:
  Use root->root_key.objectid instead of root->objectid
  Add Reviewed-by tag

v1 -> v2:
  Move call of btrfs_remove_qgroup() from btrfs_delete_subvolume()
  to btrfs_snapshot_destroy() so that it will be called after the
  subvolume root is really dropped

 fs/btrfs/extent-tree.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e7b237b9547..48edf839ed2c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_root_item *root_item = >root_item;
struct walk_control *wc;
struct btrfs_key key;
+   u64 objectid = root->root_key.objectid;
int err = 0;
int ret;
int level;
bool root_dropped = false;
 
-   btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
+   btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
 
path = btrfs_alloc_path();
if (!path) {
@@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
 
-   if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+   if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_find_root(tree_root, >root_key, path,
  NULL, NULL);
if (ret < 0) {
@@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 *
 * The most common failure here is just -ENOENT.
 */
-   btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
+   btrfs_del_orphan_item(trans, tree_root, objectid);
}
}
 
@@ -9056,6 +9056,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_put_fs_root(root);
}
root_dropped = true;
+
+/* Remove level-0 qgroup items since no other subvolume can use them */
+   ret = btrfs_remove_qgroup(trans, objectid);
+   if (ret && ret != -EINVAL && ret != -ENOENT) {
+   btrfs_abort_transaction(trans, ret);
+   err = ret;
+   }
+
 out_end_trans:
btrfs_end_transaction_throttle(trans);
 out_free:
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs-progs: ins: logical-resolve: Print message when path cannot be resolved

2018-08-05 Thread Misono Tomohiro
On 2018/08/04 0:06, David Sterba wrote:
> On Wed, Jul 25, 2018 at 05:20:17PM +0900, Misono Tomohiro wrote:
>> Since BTRFS_IOC_INO_PATHS requires fd of subvolume,
> 
> Does it? AFAICS btrfs_ioctl_ino_to_path gets root that's the containing
> subvolume of the path given by the user.

It is reverse; it returns all paths of given inode number in given subvolume fd.

> 
>> logical-resolve
>> cannot find the path when mount point is not FS_TREE
>> (because the subvolume path cannot be opened).
> 
> Sorry, I don't understand what's the problem here. What you write sounds
> like there's a reproducer. If yes, please post it.

Sure. If mountpoint is FS_TREE, everything is ok:

$ mkfs.btrfs -fq $DEV
$ mount $DEV /mnt

// create snapshot and hardlink
$ btrfs sub create /mnt/sub
$ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
$ btrfs sub snap /mnt/sub /mnt/snap
$ ln /mnt/sub/file /mnt/sub/file2
$ tree --inodes /mnt
/mnt
|-- [256]  snap
|   `-- [257]  file
`-- [256]  sub
|-- [257]  file
`-- [257]  file2

inode-resolve returns all paths of given inode number in given subvolume:
(it calls IOC_INO_PATHS)
$ btrfs ins inode-resolve 257 /mnt/sub
/mnt/sub/file
/mnt/sub/file2

$ btrfs ins inode-resolve 257 /mnt/snap
/mnt/snap/file

logical-resolve returns all paths containing given logical address in fs:
(it calls IOC_INO_PATHS after IOC_LOGICAL_INO)
$ filefrag -v /mnt/sub/file
Filesystem type is: 9123683e
File size of /mnt/sub/file is 1024000 (250 blocks of 4096 bytes)
 ext: logical_offset:physical_offset: length:   expected: flags:
   0:0.. 249:   3392..  3641:250: 
last,shared,eof
/mnt/sub/file: 1 extent found
$ btrfs ins logical-resolve $((3392*4096)) /mnt
/mnt/snap/file
/mnt/sub/file
/mnt/sub/file2


However, when mountpoint is not FS_TREE, above logical-resolve fails:

$ umount /mnt
$ mount -o subvol=sub $DEV /mnt
$ btrfs ins logical-resolve $((3392*4096)) /mnt
ERROR: cannot access '/mnt/snap': No such file or directory

The reasons are
(1) btrfs_list_path_for_root() returns paths from FS_TREE
(2) subvolume path cannot be opened and therefore cannot call IOC_INO_PATHS

(1) may be fixed for mounted subvolume, but if a subvolume is not accessible at 
all,
there is no way to call IOC_INO_PATHS.
To solve this, we need to pass arbitrary treeid just like INO_LOOKUP ioctl.

Thanks,
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] btrfs: qgroup: Remove qgroup item along with subvolume deletion

2018-08-05 Thread Misono Tomohiro
On 2018/08/03 18:16, Lu Fengqi wrote:
> On Fri, Aug 03, 2018 at 11:39:28AM +0300, Nikolay Borisov wrote:
>>
>>
>> On  3.08.2018 11:37, Misono Tomohiro wrote:
>>> On 2018/08/03 16:15, Lu Fengqi wrote:
>>>> On Fri, Aug 03, 2018 at 03:21:12PM +0900, Misono Tomohiro wrote:
>>>>> When qgroup is on, subvolume deletion does not remove qgroup item
>>>>> of the subvolume (qgroup info, limits, relation) from quota tree and
>>>>> they needs to get removed manually by "btrfs qgroup destroy".
>>>>>
>>>>> Since level 0 qgroup cannot be used/inherited by any other subvolume,
>>>>> let's remove them automatically when subvolume is deleted
>>>>> (to be precise, when the subvolume root is dropped).
>>>>>
>>>>> Signed-off-by: Misono Tomohiro 
>>>>
>>>> Looks good to me.
>>>>
>>>> Reviewed-by: Lu Fengqi 
>>>
>>> Thanks for the review.
>>>
>>>>
>>>> There is an off-topic question below.
>>>>
>>>>> ---
>>>>> Note that btrfs/057 fails, but it is the problem of testcase.
>>>>> I will update it too.
>>>>>
>>>>> v1 -> v2:
>>>>>  Move call of btrfs_remove_qgroup() from btrfs_delete_subvolume()
>>>>>  to btrfs_snapshot_destroy() so that it will be called after the
>>>>>  subvolume root is really dropped
>>>>>
>>>>> fs/btrfs/extent-tree.c | 16 
>>>>> 1 file changed, 12 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>>>>> index 9e7b237b9547..b56dea8c8b9f 100644
>>>>> --- a/fs/btrfs/extent-tree.c
>>>>> +++ b/fs/btrfs/extent-tree.c
>>>>> @@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>>>>   struct btrfs_root_item *root_item = >root_item;
>>>>>   struct walk_control *wc;
>>>>>   struct btrfs_key key;
>>>>> + u64 objectid = root->objectid;
>>>>>   int err = 0;
>>>>>   int ret;
>>>>>   int level;
>>>>>   bool root_dropped = false;
>>>>>
>>>>> - btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
>>>>> + btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
>>>>>
>>>>>   path = btrfs_alloc_path();
>>>>>   if (!path) {
>>>>> @@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>>>>   goto out_end_trans;
>>>>>   }
>>>>>
>>>>> - if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
>>>>> + if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
>>>>
>>>> Here use root->objectid instead of root->root_key.objectid. If I recall
>>>> correctly, the root->objectid and root->root_key.objectid are set to the
>>>> identical value. I just wonder if there is any difference between the two
>>>> "objectid"s after the btrfs_root was created?
>>>
>>> in __setup_root(root, fs_info, objectid):
>>> 
>>>   root->objectid = objectid;
>>> 
>>>   root->root_key.objectid = objectid;
>>> 
>>>
>>> and I don't see any update of objectid from "grep -r "root_key.objectid ="",
>>> I think it the same too (and fstests is ok), but any comment from
>>> those who more familiar with code is helpful.
>>
>> Perhaps root->objectid should be removed altogether, if it's a duplicate
>> of root->root_key.objectid
> 
> That's great! I hate these useless redundancies because they always make me
> confused. So Misono could you update this patch to use
> root->root_key.objectid?

Ok. Also I'll try to see if it is possible to remove root->objectid.
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] btrfs: qgroup: Remove qgroup item along with subvolume deletion

2018-08-03 Thread Misono Tomohiro
On 2018/08/03 16:15, Lu Fengqi wrote:
> On Fri, Aug 03, 2018 at 03:21:12PM +0900, Misono Tomohiro wrote:
>> When qgroup is on, subvolume deletion does not remove qgroup item
>> of the subvolume (qgroup info, limits, relation) from quota tree and
>> they needs to get removed manually by "btrfs qgroup destroy".
>>
>> Since level 0 qgroup cannot be used/inherited by any other subvolume,
>> let's remove them automatically when subvolume is deleted
>> (to be precise, when the subvolume root is dropped).
>>
>> Signed-off-by: Misono Tomohiro 
> 
> Looks good to me.
> 
> Reviewed-by: Lu Fengqi 

Thanks for the review.

> 
> There is an off-topic question below.
> 
>> ---
>> Note that btrfs/057 fails, but it is the problem of testcase.
>> I will update it too.
>>
>> v1 -> v2:
>>  Move call of btrfs_remove_qgroup() from btrfs_delete_subvolume()
>>  to btrfs_snapshot_destroy() so that it will be called after the
>>  subvolume root is really dropped
>>
>> fs/btrfs/extent-tree.c | 16 
>> 1 file changed, 12 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 9e7b237b9547..b56dea8c8b9f 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  struct btrfs_root_item *root_item = >root_item;
>>  struct walk_control *wc;
>>  struct btrfs_key key;
>> +u64 objectid = root->objectid;
>>  int err = 0;
>>  int ret;
>>  int level;
>>  bool root_dropped = false;
>>
>> -btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
>> +btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
>>
>>  path = btrfs_alloc_path();
>>  if (!path) {
>> @@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  goto out_end_trans;
>>  }
>>
>> -if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
>> +if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
> 
> Here use root->objectid instead of root->root_key.objectid. If I recall
> correctly, the root->objectid and root->root_key.objectid are set to the
> identical value. I just wonder if there is any difference between the two
> "objectid"s after the btrfs_root was created?

in __setup_root(root, fs_info, objectid):

  root->objectid = objectid;

  root->root_key.objectid = objectid;


and I don't see any update of objectid from "grep -r "root_key.objectid ="",
I think it the same too (and fstests is ok), but any comment from
those who more familiar with code is helpful.

thanks,
Misono

> 
> --
> Thanks,
> Lu
> 
>>  ret = btrfs_find_root(tree_root, >root_key, path,
>>NULL, NULL);
>>  if (ret < 0) {
>> @@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>   *
>>   * The most common failure here is just -ENOENT.
>>   */
>> -btrfs_del_orphan_item(trans, tree_root,
>> -  root->root_key.objectid);
>> +btrfs_del_orphan_item(trans, tree_root, objectid);
>>  }
>>  }
>>
>> @@ -9056,6 +9056,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>>  btrfs_put_fs_root(root);
>>  }
>>  root_dropped = true;
>> +
>> + /* Remove level-0 qgroup items since no other subvolume can use them */
>> +ret = btrfs_remove_qgroup(trans, objectid);
>> +if (ret && ret != -EINVAL && ret != -ENOENT) {
>> +btrfs_abort_transaction(trans, ret);
>> +err = ret;
>> +}
>> +
>> out_end_trans:
>>  btrfs_end_transaction_throttle(trans);
>> out_free:
>> -- 
>> 2.14.4
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] fstests: btrfs/057: Update test case to keep subvolume in any kernel version

2018-08-03 Thread Misono Tomohiro
Since commit a79a464d5675 ("btrfs: Allow rmdir(2) to delete an empty
subvolume"), rm -r can delete a subvolume too.

This test assumes that rm -r does not delete a subvolume.
Currently the commit does not affect the test since qgroup items
still exist after subvolume deletion, but we plan to change the
behavior and remove them along with subvolume deletion.

So update the test and keep subvolume (and qgroup item) in any kernel
version.

Signed-off-by: Misono Tomohiro 
---
 tests/btrfs/057 | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/btrfs/057 b/tests/btrfs/057
index 6743f774..b019f4e1 100755
--- a/tests/btrfs/057
+++ b/tests/btrfs/057
@@ -53,8 +53,9 @@ run_check $FSSTRESS_PROG -d $SCRATCH_MNT/snap1 -w -p 5 -n 
1000 \
 _run_btrfs_util_prog quota enable $SCRATCH_MNT
 _run_btrfs_util_prog quota rescan -w $SCRATCH_MNT
 
-# ignore removing subvolume errors
-rm -rf $SCRATCH_MNT/* >& /dev/null
+# remove all file/dir other than subvolume
+rm -rf $SCRATCH_MNT/snap1/* >& /dev/null
+rm -rf $SCRATCH_MNT/p* >& /dev/null
 
 _run_btrfs_util_prog filesystem sync $SCRATCH_MNT
 units=`_btrfs_qgroup_units`
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] btrfs: qgroup: Remove qgroup item along with subvolume deletion

2018-08-03 Thread Misono Tomohiro
When qgroup is on, subvolume deletion does not remove qgroup item
of the subvolume (qgroup info, limits, relation) from quota tree and
they needs to get removed manually by "btrfs qgroup destroy".

Since level 0 qgroup cannot be used/inherited by any other subvolume,
let's remove them automatically when subvolume is deleted
(to be precise, when the subvolume root is dropped).

Signed-off-by: Misono Tomohiro 
---
Note that btrfs/057 fails, but it is the problem of testcase.
I will update it too.

v1 -> v2:
  Move call of btrfs_remove_qgroup() from btrfs_delete_subvolume()
  to btrfs_snapshot_destroy() so that it will be called after the
  subvolume root is really dropped

 fs/btrfs/extent-tree.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e7b237b9547..b56dea8c8b9f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8871,12 +8871,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_root_item *root_item = >root_item;
struct walk_control *wc;
struct btrfs_key key;
+   u64 objectid = root->objectid;
int err = 0;
int ret;
int level;
bool root_dropped = false;
 
-   btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
+   btrfs_debug(fs_info, "Drop subvolume %llu", objectid);
 
path = btrfs_alloc_path();
if (!path) {
@@ -9030,7 +9031,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
 
-   if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+   if (objectid != BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_find_root(tree_root, >root_key, path,
  NULL, NULL);
if (ret < 0) {
@@ -9043,8 +9044,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 *
 * The most common failure here is just -ENOENT.
 */
-   btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
+   btrfs_del_orphan_item(trans, tree_root, objectid);
}
}
 
@@ -9056,6 +9056,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_put_fs_root(root);
}
root_dropped = true;
+
+/* Remove level-0 qgroup items since no other subvolume can use them */
+   ret = btrfs_remove_qgroup(trans, objectid);
+   if (ret && ret != -EINVAL && ret != -ENOENT) {
+   btrfs_abort_transaction(trans, ret);
+   err = ret;
+   }
+
 out_end_trans:
btrfs_end_transaction_throttle(trans);
 out_free:
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: qgroup: Remove qgroup item along with subvolume deletion

2018-08-02 Thread Misono Tomohiro
On 2018/08/03 13:23, Lu Fengqi wrote:
> On Fri, Aug 03, 2018 at 12:17:26PM +0800, Qu Wenruo wrote:
>>
>>
>> On 2018年08月03日 12:08, Misono Tomohiro wrote:
>>> When qgroup is on, subvolume deletion does not remove qgroup items
>>> of the subvolume (qgroup info, limits, relation) from quota tree and
>>> they need to get removed manually by "btrfs qgroup destroy".
>>>
>>> Since level 0 qgroup cannot be used/inherited by any other subvolume,
>>> let's remove them automatically when subvolume is deleted.
>>>
>>> Signed-off-by: Misono Tomohiro 
>>> ---
>>>
>>> I don't see any reason to keep these items after subvolume deletion,
>>> but is there something I'm missing?
>>>
>>>  fs/btrfs/inode.c | 4 
>>>  1 file changed, 4 insertions(+)
>>>
>>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>>> index 3f51ddc18f98..4ec60a1b53a3 100644
>>> --- a/fs/btrfs/inode.c
>>> +++ b/fs/btrfs/inode.c
>>> @@ -4372,6 +4372,10 @@ int btrfs_delete_subvolume(struct inode *dir, struct 
>>> dentry *dentry)
>>> }
>>> }
>>>  
>>> +   ret = btrfs_remove_qgroup(trans, dest->root_key.objectid);
>>
>> According to the caller, it only unlinks the subvolume without really
>> delete the whole subvolume.
>>
>> I'm wondering if we should call btrfs_remove_qgroup() only after we have
>> deleted the whole subvolume, e.g inside btrfs_drop_snapshot().
> 
> I agree with Qu's point, because the ongoing online undelete subvolume will
> be able to recover the subvolume which is intact ondisk, including the qgroup.
> 

Thanks to both of you, I'll update the patch.
Misono

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: qgroup: Remove qgroup item along with subvolume deletion

2018-08-02 Thread Misono Tomohiro
When qgroup is on, subvolume deletion does not remove qgroup items
of the subvolume (qgroup info, limits, relation) from quota tree and
they need to get removed manually by "btrfs qgroup destroy".

Since level 0 qgroup cannot be used/inherited by any other subvolume,
let's remove them automatically when subvolume is deleted.

Signed-off-by: Misono Tomohiro 
---

I don't see any reason to keep these items after subvolume deletion,
but is there something I'm missing?

 fs/btrfs/inode.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3f51ddc18f98..4ec60a1b53a3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4372,6 +4372,10 @@ int btrfs_delete_subvolume(struct inode *dir, struct 
dentry *dentry)
}
}
 
+   ret = btrfs_remove_qgroup(trans, dest->root_key.objectid);
+   if (ret == -EINVAL || ret == -ENOENT)
+   ret = 0;
+
 out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: Use wrapper macro for rcu string to remove duplicate code

2018-08-02 Thread Misono Tomohiro
Cleanup patch and no functional changes.

Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/ioctl.c   |  6 ++
 fs/btrfs/scrub.c   |  8 ++--
 fs/btrfs/super.c   |  9 +++--
 fs/btrfs/volumes.c | 21 ++---
 4 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b077544b5232..2becabe2aaf4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3165,10 +3165,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info 
*fs_info,
di_args->total_bytes = btrfs_device_get_total_bytes(dev);
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
if (dev->name) {
-   struct rcu_string *name;
-
-   name = rcu_dereference(dev->name);
-   strncpy(di_args->path, name->str, sizeof(di_args->path) - 1);
+   strncpy(di_args->path, rcu_str_deref(dev->name),
+   sizeof(di_args->path) - 1);
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6702896cdb8f..5a282268deee 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4113,7 +4113,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
devid, u64 start,
struct scrub_ctx *sctx;
int ret;
struct btrfs_device *dev;
-   struct rcu_string *name;
 
if (btrfs_fs_closing(fs_info))
return -EINVAL;
@@ -4167,11 +4166,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
devid, u64 start,
if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, >dev_state)) {
mutex_unlock(_info->fs_devices->device_list_mutex);
-   rcu_read_lock();
-   name = rcu_dereference(dev->name);
-   btrfs_err(fs_info, "scrub: device %s is not writable",
- name->str);
-   rcu_read_unlock();
+   btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
+ rcu_str_deref(dev->name));
return -EROFS;
}
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81107ad49f3a..d9f00704f4aa 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2290,7 +2290,6 @@ static int btrfs_show_devname(struct seq_file *m, struct 
dentry *root)
struct btrfs_fs_devices *cur_devices;
struct btrfs_device *dev, *first_dev = NULL;
struct list_head *head;
-   struct rcu_string *name;
 
/*
 * Lightweight locking of the devices. We should not need
@@ -2314,12 +2313,10 @@ static int btrfs_show_devname(struct seq_file *m, 
struct dentry *root)
cur_devices = cur_devices->seed;
}
 
-   if (first_dev) {
-   name = rcu_dereference(first_dev->name);
-   seq_escape(m, name->str, " \t\n\\");
-   } else {
+   if (first_dev)
+   seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
+   else
WARN_ON(1);
-   }
rcu_read_unlock();
return 0;
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1da162928d1a..d30bbc3cf921 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6187,21 +6187,12 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, 
struct bio *bio,
btrfs_io_bio(bio)->stripe_index = dev_nr;
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
-#ifdef DEBUG
-   {
-   struct rcu_string *name;
-
-   rcu_read_lock();
-   name = rcu_dereference(dev->name);
-   btrfs_debug(fs_info,
-   "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id 
%llu), size=%u",
-   bio_op(bio), bio->bi_opf,
-   (u64)bio->bi_iter.bi_sector,
-   (u_long)dev->bdev->bd_dev, name->str, dev->devid,
-   bio->bi_iter.bi_size);
-   rcu_read_unlock();
-   }
-#endif
+   btrfs_debug_in_rcu(fs_info,
+   "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), 
size=%u",
+   bio_op(bio), bio->bi_opf,
+   (u64)bio->bi_iter.bi_sector,
+   (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
+   bio->bi_iter.bi_size);
bio_set_dev(bio, dev->bdev);
 
btrfs_bio_counter_inc_noblocked(fs_info);
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: replace: Reset on-disk dev stats value after replace

2018-07-31 Thread Misono Tomohiro
on-disk devs stats value is updated in btrfs_run_dev_stats(),
which is called during commit transaction, if device->dev_stats_ccnt
is not zero.

Since current replace operation does not touch dev_stats_ccnt,
on-disk dev stats value is not updated. Therefore "btrfs device stats"
may return old device's value after umount/mount
(Example: See "btrfs ins dump-t -t DEV $DEV" after btrfs/100 finish).

Fix this by just increment dev_stats_ccnt in
btrfs_dev_replace_finishing() when replace is succeeded.

Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/dev-replace.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e2ba0419297a..d20b244623f2 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -676,6 +676,12 @@ static int btrfs_dev_replace_finishing(struct 
btrfs_fs_info *fs_info,
 
btrfs_rm_dev_replace_unblocked(fs_info);
 
+   /*
+* Increment dev_stats_ccnt so that btrfs_run_dev_stats() will
+* update on-disk dev stats value during commit transaction
+*/
+   atomic_inc(_device->dev_stats_ccnt);
+
/*
 * this is again a consistent state where no dev_replace procedure
 * is running, the target device is part of the filesystem, the
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] btrfs-progs: completion: Small fixes to make debug simpler

2018-07-30 Thread Misono Tomohiro
This is useful.

Reviewed-by: Misono Tomohiro 

On 2018/07/17 15:15, Qu Wenruo wrote:
> For developer, it's pretty common to use "btrfs check" or "btrfs ins
> dump-tree" on raw dumps.
> 
> However "btrfs check" can only complete real block devices, and
> "btrfs inspect dump-tree" can only complete dir.
> 
> Make them to use _filedir() so any filename can be completed and save us
> developer a little time and nerve hitting that holy tab.
> 
> Qu Wenruo (2):
>   btrfs-progs: completion: Use _filedir() to replace _btrfs_devs()
>   btrfs-progs: completion: Let dump-tree/dump-super/inode-resolve to
> accept any file
> 
>  btrfs-completion | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 11/15] btrfs-progs: Add delayed refs infrastructure

2018-07-30 Thread Misono Tomohiro
On 2018/06/08 21:47, Nikolay Borisov wrote:
> This commit pulls those portions of the kernel implementation of
> delayed refs which are necessary to have them working in user-space.
> I've done the following modifications:
> 
> 1. Replaced all kmem_cache_alloc calls to kmalloc.
> 
> 2. Removed all locking-related code, since we are single threaded in
> userspace.
> 
> 3. Removed code which deals with data refs - delayed refs in user space
> are going to be used only for cowonly trees.
> 
> Signed-off-by: Nikolay Borisov 
> ---
>  Makefile  |   3 +-
>  ctree.h   |   3 +
>  delayed-ref.c | 608 
> ++
>  delayed-ref.h | 225 ++
>  extent-tree.c | 228 ++
>  kerncompat.h  |   8 +
>  transaction.h |   4 +
>  7 files changed, 1078 insertions(+), 1 deletion(-)
>  create mode 100644 delayed-ref.c
>  create mode 100644 delayed-ref.h
> 
> diff --git a/Makefile b/Makefile
> index 544410e6440c..9508ad4f11e6 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -116,7 +116,8 @@ objects = ctree.o disk-io.o kernel-lib/radix-tree.o 
> extent-tree.o print-tree.o \
> qgroup.o free-space-cache.o kernel-lib/list_sort.o props.o \
> kernel-shared/ulist.o qgroup-verify.o backref.o string-table.o 
> task-utils.o \
> inode.o file.o find-root.o free-space-tree.o help.o send-dump.o \
> -   fsfeatures.o kernel-lib/tables.o kernel-lib/raid56.o transaction.o
> +   fsfeatures.o kernel-lib/tables.o kernel-lib/raid56.o transaction.o \
> +   delayed-ref.o
>  cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o 
> \
>  cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \
>  cmds-quota.o cmds-qgroup.o cmds-replace.o check/main.o \
> diff --git a/ctree.h b/ctree.h
> index b30a946658ce..d1ea45571d1e 100644
> --- a/ctree.h
> +++ b/ctree.h
> @@ -2812,4 +2812,7 @@ int btrfs_punch_hole(struct btrfs_trans_handle *trans,
>  int btrfs_read_file(struct btrfs_root *root, u64 ino, u64 start, int len,
>   char *dest);
>  
> +
> +/* extent-tree.c */
> +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long 
> nr);
>  #endif
> diff --git a/delayed-ref.c b/delayed-ref.c
> new file mode 100644
> index ..f3fa50239380
> --- /dev/null
> +++ b/delayed-ref.c
> @@ -0,0 +1,608 @@



> +
> +static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
> + struct btrfs_delayed_ref_root *delayed_refs,
> + struct btrfs_delayed_ref_head *head,
> + struct btrfs_delayed_ref_node *ref)
> +{
> + rb_erase(>ref_node, >ref_tree);
> + RB_CLEAR_NODE(>ref_node);
> + if (!list_empty(>add_list))
> + list_del(>add_list);
> + ref->in_tree = 0;
> + btrfs_put_delayed_ref(ref);

Compared with kernel code, it seems that we need

delayed_refs->num_entries--;

> + if (trans->delayed_ref_updates)
> + trans->delayed_ref_updates--;
> +}

> +static noinline struct btrfs_delayed_ref_head *
> +add_delayed_ref_head(struct btrfs_trans_handle *trans,
> +  struct btrfs_delayed_ref_head *head_ref,
> +  void *qrecord,
> +  int action, int *qrecord_inserted_ret,
> +  int *old_ref_mod, int *new_ref_mod)
> +{
> + struct btrfs_delayed_ref_head *existing;
> + struct btrfs_delayed_ref_root *delayed_refs;
> +
> + delayed_refs = >delayed_refs;
> +
> + existing = htree_insert(_refs->href_root, _ref->href_node);
> + if (existing) {
> + update_existing_head_ref(delayed_refs, existing, head_ref, 
> old_ref_mod);
> + /*
> +  * we've updated the existing ref, free the newly
> +  * allocated ref
> +  */
> + kfree(head_ref);
> + head_ref = existing;
> + } else {
> + if (old_ref_mod)
> + *old_ref_mod = 0;
> + delayed_refs->num_heads++;
> + delayed_refs->num_heads_ready++;

And
delayed_refs->num_entries++;

to correctly count num_entries.
(I noticed that num_entries went to negative value when I'm running gdb)

However, num_entries is actually not used in progs at all (it is used for
throttling in kernel), so maybe we can just drop the variable from progs?

> + trans->delayed_ref_updates++;
> + }
> + if (new_ref_mod)
> + *new_ref_mod = head_ref->total_ref_mod;
> +
> + return head_ref;
> +}



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 14/15] btrfs-progs: Wire up delayed refs

2018-07-30 Thread Misono Tomohiro
On 2018/06/08 21:47, Nikolay Borisov wrote:
> This commit enables the delayed refs infrastructures. This entails doing
> the following:
> 
> 1. Replacing existing calls of btrfs_extent_post_op (which is the
> equivalent of delayed refs) with the proper btrfs_run_delayed_refs.
> As well as eliminating open-coded calls to finish_current_insert and
> del_pending_extents which execute the delayed ops.
> 
> 2. Wiring up the addition of delayed refs when freeing extents
> (btrfs_free_extent) and when adding new extents (alloc_tree_block).
> 
> 3. Adding calls to btrfs_run_delayed refs in the transaction commit
> path alongside comments why every call is needed, since it's not always
> obvious (those call sites were derived empirically by running and
> debugging existing tests)
> 
> 4. Correctly flagging the transaction in which we are reinitialising
> the extent tree.
> 
> Signed-off-by: Nikolay Borisov 
> ---
>  check/main.c  |   3 +-
>  extent-tree.c | 166 
> ++
>  transaction.c |  24 +
>  3 files changed, 111 insertions(+), 82 deletions(-)
> 
> diff --git a/check/main.c b/check/main.c
> index b84903acdb25..7c9689f29fd3 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -8634,7 +8634,7 @@ static int reinit_extent_tree(struct btrfs_trans_handle 
> *trans,
>   fprintf(stderr, "Error adding block group\n");
>   return ret;
>   }
> - btrfs_extent_post_op(trans);
> + btrfs_run_delayed_refs(trans, -1);
>   }
>  
>   ret = reset_balance(trans, fs_info);
> @@ -9682,6 +9682,7 @@ int cmd_check(int argc, char **argv)
>   goto close_out;
>   }
>  
> + trans->reinit_extent_tree = true;
>   if (init_extent_tree) {
>   printf("Creating a new extent tree\n");
>   ret = reinit_extent_tree(trans, info,
> diff --git a/extent-tree.c b/extent-tree.c
> index 3208ed11cb91..9d085158f2d8 100644
> --- a/extent-tree.c
> +++ b/extent-tree.c
> @@ -1418,8 +1418,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle 
> *trans,
>   err = ret;
>  out:
>   btrfs_free_path(path);
> - finish_current_insert(trans);
> - del_pending_extents(trans);
>   BUG_ON(err);
>   return err;
>  }
> @@ -1602,8 +1600,6 @@ int btrfs_set_block_flags(struct btrfs_trans_handle 
> *trans, u64 bytenr,
>   btrfs_set_extent_flags(l, item, flags);
>  out:
>   btrfs_free_path(path);
> - finish_current_insert(trans);
> - del_pending_extents(trans);
>   return ret;
>  }
>  
> @@ -1701,7 +1697,6 @@ static int write_one_cache_group(struct 
> btrfs_trans_handle *trans,
>struct btrfs_block_group_cache *cache)
>  {
>   int ret;
> - int pending_ret;
>   struct btrfs_root *extent_root = trans->fs_info->extent_root;
>   unsigned long bi;
>   struct extent_buffer *leaf;
> @@ -1717,12 +1712,8 @@ static int write_one_cache_group(struct 
> btrfs_trans_handle *trans,
>   btrfs_mark_buffer_dirty(leaf);
>   btrfs_release_path(path);
>  fail:
> - finish_current_insert(trans);
> - pending_ret = del_pending_extents(trans);
>   if (ret)
>   return ret;
> - if (pending_ret)
> - return pending_ret;
>   return 0;
>  
>  }
> @@ -2050,6 +2041,7 @@ static int finish_current_insert(struct 
> btrfs_trans_handle *trans)
>   int skinny_metadata =
>   btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
>  
> +
>   while(1) {
>   ret = find_first_extent_bit(>extent_ins, 0, ,
>   , EXTENT_LOCKED);
> @@ -2081,6 +2073,8 @@ static int finish_current_insert(struct 
> btrfs_trans_handle *trans)
>   BUG_ON(1);
>   }
>  
> +
> + printf("shouldn't be executed\n");
>   clear_extent_bits(>extent_ins, start, end, EXTENT_LOCKED);
>   kfree(extent_op);
>   }
> @@ -2380,7 +2374,6 @@ static int __free_extent(struct btrfs_trans_handle 
> *trans,
>   }
>  fail:
>   btrfs_free_path(path);
> - finish_current_insert(trans);
>   return ret;
>  }
>  
> @@ -2463,33 +2456,30 @@ int btrfs_free_extent(struct btrfs_trans_handle 
> *trans,
> u64 bytenr, u64 num_bytes, u64 parent,
> u64 root_objectid, u64 owner, u64 offset)
>  {
> - struct btrfs_root *extent_root = root->fs_info->extent_root;
> - int pending_ret;
>   int ret;
>  
>   WARN_ON(num_bytes < root->fs_info->sectorsize);
> - if (root == extent_root) {
> - struct pending_extent_op *extent_op;
> -
> - extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
> - BUG_ON(!extent_op);
> -
> - extent_op->type = PENDING_EXTENT_DELETE;
> - extent_op->bytenr = bytenr;
> - extent_op->num_bytes = 

Re: [PATCH] btrfs: qgroup: Init flags with RESCAN bit at quota enable time

2018-07-27 Thread Misono Tomohiro
On 2018/07/27 15:09, Qu Wenruo wrote:
> 
> 
> On 2018年07月27日 09:43, Misono Tomohiro wrote:
>> On 2018/07/27 10:19, Qu Wenruo wrote:
>>>
>>>
>>> On 2018年07月27日 09:10, Misono Tomohiro wrote:
>>>> On 2018/07/26 18:15, Qu Wenruo wrote:
>>>>> Between btrfs_quota_enable() finished and rescan kicked in, there is a
>>>>> small window that quota status has (ON | INCONSISTENT) bits set but
>>>>> without RESCAN bits set.
>>>>>
>>>>> And transaction is committed inside the window and then power loss
>>>>> happens, we will have a quota tree with all qgroup numbers set to 0, and
>>>>> not RESCAN bit set.
>>>>>
>>>>> At next mount time, qgroup rescan will not kick in due to the missing of
>>>>> RESCAN bit, user needs to kick in rescan manually.
>>>>>
>>>>> This patch will fix it by setting RESCAN bit at btrfs_quota_enable(),
>>>>> so even after power loss we will still kick in rescan automatically.
>>>>>
>>>>> Suggested-by: Misono Tomohiro 
>>>>> Signed-off-by: Qu Wenruo 
>>>>> ---
>>>>>  fs/btrfs/qgroup.c | 5 +++--
>>>>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
>>>>> index c25dc47210a3..13c1c7dd278d 100644
>>>>> --- a/fs/btrfs/qgroup.c
>>>>> +++ b/fs/btrfs/qgroup.c
>>>>> @@ -930,7 +930,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle 
>>>>> *trans,
>>>>>   btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
>>>>>   btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
>>>>>   fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
>>>>> - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>>>>> + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
>>>>> + BTRFS_QGROUP_STATUS_FLAG_RESCAN;
>>>>>   btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
>>>>>   btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>>>>>  
>>>>> @@ -987,7 +988,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle 
>>>>> *trans,
>>>>>   fs_info->quota_root = quota_root;
>>>>>   set_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>>>>>   spin_unlock(_info->qgroup_lock);
>>>>> - ret = qgroup_rescan_init(fs_info, 0, 1);
>>>>> + ret = qgroup_rescan_init(fs_info, 0, 0);
>>>>>   if (!ret) {
>>>>>   qgroup_rescan_zero_tracking(fs_info);
>>>>>   btrfs_queue_work(fs_info->qgroup_rescan_workers,
>>>>>
>>>>
>>>> This is what I think at first, but is it ok not holding 
>>>> fs_info->qgroup_ioctl_lock
>>>> in brfs_qgroup_rescan() as you concerned in previous thread?
>>>
>>> I think it's OK, since we have larger mutex (subvol_sem) for
>>> quota_enable/disable() so there will be no concurrency modifying flags.
>>> And we're holding trans handler from btrfs_ioctl_quota_ctl(),
>>> transaction won't be committed in btrfs_quota_enable().
>>
>> Ok, but nikolay's patch in misc-next moves transaction commit in 
>> btrfs_quota_enable():
>>   https://patchwork.kernel.org/patch/10508819/
>>   ("btrfs: qgroups: Move transaction management inside 
>> btrfs_quota_enable/disable")
> 
> Since qgroup_rescan_init() has nothing do to with transaction, it looks
> OK even with Nikolay's patch.
> 

Understood. Thanks for your explanation.
Misono

>>
>> This is related to https://marc.info/?l=linux-btrfs=152999289017582.
>> However, it seems that other people does not see the problem,
>> so I'm not sure how the above patch ends up...
> 
> IIRC I also failed to reproduce it, thus can't provide much help for
> that thread.
> 
> Thanks,
> Qu
> 
> 
> 
>>
>> Thanks,
>> Tomohiro Misono
>>
>>>
>>> So I think it's OK.
>>>
>>> Thanks,
>>> Qu
>>>
>>>
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: qgroup: Init flags with RESCAN bit at quota enable time

2018-07-26 Thread Misono Tomohiro
On 2018/07/27 10:19, Qu Wenruo wrote:
> 
> 
> On 2018年07月27日 09:10, Misono Tomohiro wrote:
>> On 2018/07/26 18:15, Qu Wenruo wrote:
>>> Between btrfs_quota_enable() finished and rescan kicked in, there is a
>>> small window that quota status has (ON | INCONSISTENT) bits set but
>>> without RESCAN bits set.
>>>
>>> And transaction is committed inside the window and then power loss
>>> happens, we will have a quota tree with all qgroup numbers set to 0, and
>>> not RESCAN bit set.
>>>
>>> At next mount time, qgroup rescan will not kick in due to the missing of
>>> RESCAN bit, user needs to kick in rescan manually.
>>>
>>> This patch will fix it by setting RESCAN bit at btrfs_quota_enable(),
>>> so even after power loss we will still kick in rescan automatically.
>>>
>>> Suggested-by: Misono Tomohiro 
>>> Signed-off-by: Qu Wenruo 
>>> ---
>>>  fs/btrfs/qgroup.c | 5 +++--
>>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
>>> index c25dc47210a3..13c1c7dd278d 100644
>>> --- a/fs/btrfs/qgroup.c
>>> +++ b/fs/btrfs/qgroup.c
>>> @@ -930,7 +930,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>>> btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
>>> btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
>>> fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
>>> -   BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>>> +   BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
>>> +   BTRFS_QGROUP_STATUS_FLAG_RESCAN;
>>> btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
>>> btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>>>  
>>> @@ -987,7 +988,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>>> fs_info->quota_root = quota_root;
>>> set_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>>> spin_unlock(_info->qgroup_lock);
>>> -   ret = qgroup_rescan_init(fs_info, 0, 1);
>>> +   ret = qgroup_rescan_init(fs_info, 0, 0);
>>> if (!ret) {
>>> qgroup_rescan_zero_tracking(fs_info);
>>> btrfs_queue_work(fs_info->qgroup_rescan_workers,
>>>
>>
>> This is what I think at first, but is it ok not holding 
>> fs_info->qgroup_ioctl_lock
>> in brfs_qgroup_rescan() as you concerned in previous thread?
> 
> I think it's OK, since we have larger mutex (subvol_sem) for
> quota_enable/disable() so there will be no concurrency modifying flags.
> And we're holding trans handler from btrfs_ioctl_quota_ctl(),
> transaction won't be committed in btrfs_quota_enable().

Ok, but nikolay's patch in misc-next moves transaction commit in 
btrfs_quota_enable():
  https://patchwork.kernel.org/patch/10508819/
  ("btrfs: qgroups: Move transaction management inside 
btrfs_quota_enable/disable")

This is related to https://marc.info/?l=linux-btrfs=152999289017582.
However, it seems that other people does not see the problem,
so I'm not sure how the above patch ends up...

Thanks,
Tomohiro Misono

> 
> So I think it's OK.
> 
> Thanks,
> Qu
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: qgroup: Init flags with RESCAN bit at quota enable time

2018-07-26 Thread Misono Tomohiro
On 2018/07/26 18:15, Qu Wenruo wrote:
> Between btrfs_quota_enable() finished and rescan kicked in, there is a
> small window that quota status has (ON | INCONSISTENT) bits set but
> without RESCAN bits set.
> 
> And transaction is committed inside the window and then power loss
> happens, we will have a quota tree with all qgroup numbers set to 0, and
> not RESCAN bit set.
> 
> At next mount time, qgroup rescan will not kick in due to the missing of
> RESCAN bit, user needs to kick in rescan manually.
> 
> This patch will fix it by setting RESCAN bit at btrfs_quota_enable(),
> so even after power loss we will still kick in rescan automatically.
> 
> Suggested-by: Misono Tomohiro 
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/qgroup.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index c25dc47210a3..13c1c7dd278d 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -930,7 +930,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
>   btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
>   fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
> - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
> + BTRFS_QGROUP_STATUS_FLAG_RESCAN;
>   btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
>   btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>  
> @@ -987,7 +988,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   fs_info->quota_root = quota_root;
>   set_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>   spin_unlock(_info->qgroup_lock);
> - ret = qgroup_rescan_init(fs_info, 0, 1);
> + ret = qgroup_rescan_init(fs_info, 0, 0);
>   if (!ret) {
>   qgroup_rescan_zero_tracking(fs_info);
>   btrfs_queue_work(fs_info->qgroup_rescan_workers,
> 

This is what I think at first, but is it ok not holding 
fs_info->qgroup_ioctl_lock
in brfs_qgroup_rescan() as you concerned in previous thread?

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: qgroup: Auto kick in rescan if qgroup tree is initialized without rescan initialized

2018-07-26 Thread Misono Tomohiro
On 2018/07/26 15:59, Qu Wenruo wrote:
> Under certain case, btrfs/166 could cause power loss just after quota
> tree initialized but rescan not kicked in.
> 
> In this case, since flags of qgroup status item is just ON |
> INCONSISTENT, without RESCAN flag, rescan won't be kicked in in next
> mount.
> 
> Now kick in rescan automatically for such situation, so user won't need
> to do rescan manually.

How about setting all of BTRFS_QGROUP_STATUS_FLAG_ON/INCONSISTENT/RESCAN
at first place and calling qgroup_rescan_init(fs_info, 0, 0) 
(currently it is (fs_info, 0, 1)) in btrfs_quota_enable()?

I think with this approach, current btrfs-progs can work as expected too.

> 
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/qgroup.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index c25dc47210a3..e62598fc354f 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -392,6 +392,17 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info 
> *fs_info)
>   fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
> ptr);
>   rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
> +
> + /*
> +  * Qgroup is enabled but rescan hans't kicked in and
> +  * power loss happened, kick rescan in
> +  */
> + if (rescan_progress == 0 &&
> + (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
> +  BTRFS_QGROUP_STATUS_FLAG_ON) ==
> + fs_info->qgroup_flags)
> + fs_info->qgroup_flags |=
> + BTRFS_QGROUP_STATUS_FLAG_RESCAN;
>   goto next1;
>   }
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: extent-tree.c: Remove unused __btrfs_free_block_rsv()

2018-07-25 Thread Misono Tomohiro
There is no user of this function.

This is forgotten to get removed in commit a575ceeb1338
("Btrfs: get rid of unused orphan infrastructure").

Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/ctree.h   | 1 -
 fs/btrfs/extent-tree.c | 5 -
 2 files changed, 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9c638931b75e..6506a3e8ccbd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2764,7 +2764,6 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info 
*fs_info,
   unsigned short type);
 void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
  struct btrfs_block_rsv *rsv);
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
 int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 16b916a33e56..240535eec2f4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5529,11 +5529,6 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
kfree(rsv);
 }
 
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
-{
-   kfree(rsv);
-}
-
 int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush)
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: backref.c: Use ERR_CAST() to return error code

2018-07-25 Thread Misono Tomohiro
Use ERR_CAST() instead of void * to make meaning clear.

Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/backref.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7ce0b5f9e99e..9f231bbccb1e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2231,7 +2231,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct 
btrfs_root *fs_root,
 
fspath = init_data_container(total_bytes);
if (IS_ERR(fspath))
-   return (void *)fspath;
+   return ERR_CAST(fspath);
 
ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
if (!ifp) {
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: backref: Do not return duplicate refs from find_parent_nodes()

2018-07-25 Thread Misono Tomohiro
In some case, "btrfs inspect-internal logical-resolve" returns
duplicate entries for given logical address:

[Example]
=
// create a file sharing the exactly same extent
$ dd if=/dev/urandom of=/mnt/file bs=4k count=1
$ sync
$ cloner -s 0 -d 4096 -l 4096 /mnt/file /mnt/file // cloner from xfstest
$ sync

$ filefrag -v
Filesystem type is: 9123683e
File size of /mnt/file is 8192 (2 blocks of 4096 bytes)
 ext: logical_offset:physical_offset: length:   expected: flags:
   0:0..   0:   3410..  3410:  1: shared
   1:1..   1:   3410..  3410:  1:   3411: 
last,shared,eof
/mnt/file 2 extents found

// there are two backrefs
$ btrfs inspect-internal dump-tree $DEV | grep -A 3 "$((3410*4096)) EXTENT_ITEM"
  item 12 key (13967360 EXTENT_ITEM 4096) itemoff 15449 itemsize 82
 refs 2 gen 250 flags DATA
 extent data backref root FS_TREE objectid 268 offset 4096 count 1
 extent data backref root FS_TREE objectid 268 offset 0 count 1

$ btrfs inspect-internal logical-resolve -P $((3410*4096)) /mnt
inode 268 offset 4096 root 5
inode 268 offset 0 root 5
inode 268 offset 4096 root 5 // Duplicate

(Or, see 004.full of xfstest for more complex case.)
=

This problem is related to resolving indirect ref.
Related call stack is as below:

btrfs_ioctl_logical_to_ino()
  - iterate_extent_inodes_from_logical()
- extent_from_logical()
- iterate_extent_inodes()
  - btrfs_find_all_leafs()
- find_parent_nodes() // collect ref and root info
  - add_delayed_refs()
  - add_inline_refs()
  - resolve_indirect_refs()
- resolve_indirect_ref()
  - add_all_parents()

In above example, two indirect backrefs of EXTENT_DATA
((268 EXTENT_DATA 0), (268 EXTENT_DATA 4096)) will be added in rb_tree
(descending order) in add_inline_refs(). They will be resolved in
add_all_parents() in the end. However, add_all_parents() will search all
EXTENT_DATA item of the root larger than the given offset which point to
the target extent position each time.
This is needed since backref may be referenced several time within a
file (e.g. when extent is split), but could make duplicate entries.

So, in above case, (268 EXTENT_DATA 4096) returns
"inode 268 offset 4096 root 5" and (268 EXTENT_DATA 0) returns
"inode 268 offset 4096 root 0" and "inode 268 offset 4096 root 5".

Fix this problem by only searching the key with the lowest offset when
ref's rootid and key objectid is the same. In order to this, sort entry
in rb_tree ascending order (by swapping argument order in
prelim_ref_compare() in prelim_ref_insert()) and skip call of
resolve_indirect_ref() when rootid/key objectid matches the previous search.

With this patch, logical-resolve will return correct entries:
$ btrfs inspect-internal logical-resolve -P $((3410*4096)) /mnt
inode 268 offset 0 root 5
inode 268 offset 4096 root 5

Note that find_parent_nodes() is also used in some other places,
but they should not be affected by this patch as stated below:
 - send/scrub (through iterate_extent_inodes())
- can work correctly with or without duplicate refs
 - qgroup (through btrfs_find_all_roots())
- use only root info and not ref
 - snapshot aware defrag (through iterate_extent_inodes_from_logical())
- currently dead code and not used

Signed-off-by: Misono Tomohiro 
---
 fs/btrfs/backref.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 60f4afa8ecbc..7ce0b5f9e99e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -237,7 +237,7 @@ static void prelim_ref_insert(const struct btrfs_fs_info 
*fs_info,
while (*p) {
parent = *p;
ref = rb_entry(parent, struct prelim_ref, rbnode);
-   result = prelim_ref_compare(ref, newref);
+   result = prelim_ref_compare(newref, ref);
if (result < 0) {
p = &(*p)->rb_left;
} else if (result > 0) {
@@ -612,6 +612,8 @@ static int resolve_indirect_refs(struct btrfs_fs_info 
*fs_info,
 {
int err;
int ret = 0;
+   u64 prev_rootid = 0;
+   u64 prev_objectid = 0;
struct ulist *parents;
struct ulist_node *node;
struct ulist_iterator uiter;
@@ -640,10 +642,14 @@ static int resolve_indirect_refs(struct btrfs_fs_info 
*fs_info,
rb_erase(>rbnode, >indirect.root);
preftrees->indirect.count--;
 
-   if (ref->count == 0) {
+   if (ref->count == 0 ||
+   (prev_rootid == ref->root_id &&
+prev_objectid == ref->key_for_search.objectid)) {
free_pref(ref);
continue;
}
+   prev_rootid = ref->root_id;
+   

[PATCH 1/3] btrfs-progs: ins: logical-resolve: Set correct error value

2018-07-25 Thread Misono Tomohiro
Reset ret value to zero after snprintf(), which returns the number
of written chars. Otherwise non-zero value returns after command
success with -P option. Also set return value from __ino_to_path_fd() to
reflect the final status for default behavior.

Signed-off-by: Misono Tomohiro 
---
 cmds-inspect.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cmds-inspect.c b/cmds-inspect.c
index 2fc50c1a..ac77a5ee 100644
--- a/cmds-inspect.c
+++ b/cmds-inspect.c
@@ -211,6 +211,7 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
error("path buffer too small: %d bytes", bytes_left);
goto out;
}
+   ret = 0;
 
for (i = 0; i < inodes->elem_cnt; i += 3) {
u64 inum = inodes->val[i];
@@ -245,7 +246,8 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
goto out;
}
}
-   __ino_to_path_fd(inum, path_fd, verbose, full_path);
+   ret = __ino_to_path_fd(inum, path_fd, verbose,
+   full_path);
if (path_fd != fd)
close_file_or_dir(path_fd, dirs);
} else {
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] btrfs-progs: ins: logical-resolve: Print message when path cannot be resolved

2018-07-25 Thread Misono Tomohiro
Since BTRFS_IOC_INO_PATHS requires fd of subvolume, logical-resolve
cannot find the path when mount point is not FS_TREE
(because the subvolume path cannot be opened).

In that case, print message to try -P option instead.

Signed-off-by: Misono Tomohiro 
---
 cmds-inspect.c | 34 +-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/cmds-inspect.c b/cmds-inspect.c
index ac77a5ee..21aa2903 100644
--- a/cmds-inspect.c
+++ b/cmds-inspect.c
@@ -137,16 +137,18 @@ static const char * const 
cmd_inspect_logical_resolve_usage[] = {
 static int cmd_inspect_logical_resolve(int argc, char **argv)
 {
int ret;
-   int fd;
+   int fd = -1;
int i;
int verbose = 0;
int getpath = 1;
int bytes_left;
+   u64 rootid;
struct btrfs_ioctl_logical_ino_args loi;
struct btrfs_data_container *inodes;
u64 size = 4096;
char full_path[PATH_MAX];
char *path_ptr;
+   char *mount_path = NULL;
DIR *dirstream = NULL;
 
optind = 0;
@@ -178,6 +180,35 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
if (!inodes)
return 1;
 
+   /* Check if mount root is FS_ROOT */
+   if (getpath) {
+   ret = find_mount_root(argv[optind + 1], _path);
+   if (ret) {
+   error("cannot find mount root: %m");
+   goto out;
+   }
+
+   fd = btrfs_open_file_or_dir(mount_path, , 1);
+   if (fd < 0) {
+   ret = 1;
+   goto out;
+   }
+
+   ret = lookup_path_rootid(fd, );
+   if (ret) {
+   error("failed to lookup root id: %m");
+   goto out;
+   }
+
+   if (rootid != BTRFS_FS_TREE_OBJECTID) {
+   ret = 1;
+printf("cannot resolve path when subvolume is mounted directly. try -P 
option\n");
+   goto out;
+   }
+
+   close_file_or_dir(fd, dirstream);
+   }
+
memset(inodes, 0, sizeof(*inodes));
loi.logical = arg_strtou64(argv[optind]);
loi.size = size;
@@ -259,6 +290,7 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
 out:
close_file_or_dir(fd, dirstream);
free(inodes);
+   free(mount_path);
return !!ret;
 }
 
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/3] btrfs-progs: ins: logical-resolve: Add v2 ioctl support

2018-07-25 Thread Misono Tomohiro
Add --ignore-offset option to logical-resolve command
to show how BTRFS_IOC_LOGICAL_INO_V2 ioctl works
(return every ref to the extent of given logical address).

Documentation is also updated to make meaning more clear.
See below example for more detailed explanation.

[Example]
$ mkfs.btrfs -f $DEV
$ mount $DEV /mnt

$ dd if=/dev/urandom of=/mnt/file bs=4k count=100
$ sync
// split above extent
$ dd if=/dev/urandom of=/mnt/file bs=4k seek=10 count=1 conv=notrunc
$ sync

$ filefrag -v /mnt/file
Filesystem type is: 9123683e
File size of /mnt/file is 409600 (100 blocks of 4096 bytes)
 ext: logical_offset:physical_offset: length:   expected: flags:
   0:0..   9:   3392..  3401: 10:
   1:   10..  10:   3328..  3328:  1:   3402:
   2:   11..  99:   3403..  3491: 89:   3329: last,eof
/mnt/file: 3 extents found

// Actually extent 0 and 2 point to the same extent (with different offset):
$ btrfs inspect-internal dump-tree $DEV | \
grep -A 2 "$((3392*4096) EXTENT_ITEM"
  item 2 key (13893632 EXTENT_ITEM 409600) itemoff 16153 itemsize 53
  refs 2 gen 7 flags DATA
  extent data backref root FS_TREE objectid 257 offset 0 count 2

$ btrfs inspect-internal dump-tree $DEV | \
grep -A 4 "257 EXTENT_ITEM"
item 7 key (257 EXTENT_DATA 0) itemoff 15733 itemsize 53
generation 7 type 1 (regular)
extent data disk byte 13893632 nr 409600
extent data offset 0 nr 40960 ram 409600
extent compression 0 (none)
item 8 key (257 EXTENT_DATA 40960) itemoff 15680 itemsize 53
generation 8 type 1 (regular)
extent data disk byte 13631488 nr 4096
extent data offset 0 nr 4096 ram 4096
extent compression 0 (none)
item 9 key (257 EXTENT_DATA 45056) itemoff 15627 itemsize 53
generation 7 type 1 (regular)
extent data disk byte 13893632 nr 409600
extent data offset 45056 nr 364544 ram 409600
extent compression 0 (none)

// v1 ioctl only returns the refs pointing given address block
$ btrfs inspect-internal logical-resolve -P $((3392*4096)) /mnt
inode 257 offset 0 root 5
$ btrfs inspect-internal logical-resolve -P $((3403*4096)) /mnt
inode 257 offset 45056 root 5

// v2 ioctl returns all refs pointing at least one block of given extent
$ btrfs inspect-internal logical-resolve -P --ignore-offset \
  $((3392*4096)) /mnt
inode 257 offset 0 root 5
inode 257 offset 45056 root 5

Signed-off-by: Misono Tomohiro 
---
 v1 -> v2
   - add explnation
   - add build assert
   - use long option istead of -i

 Documentation/btrfs-inspect-internal.asciidoc | 15 +++---
 cmds-inspect.c| 29 ---
 ioctl.h   | 11 +-
 libbtrfsutil/btrfs.h  | 10 -
 4 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/Documentation/btrfs-inspect-internal.asciidoc 
b/Documentation/btrfs-inspect-internal.asciidoc
index e2db6466..5f80323e 100644
--- a/Documentation/btrfs-inspect-internal.asciidoc
+++ b/Documentation/btrfs-inspect-internal.asciidoc
@@ -112,19 +112,28 @@ at 'path', ie. all hardlinks
 -v
 verbose mode, print count of returned paths and ioctl() return value
 
-*logical-resolve* [-Pv] [-s ]  ::
+*logical-resolve* [options]  ::
 (needs root privileges)
 +
-resolve paths to all files at given 'logical' address in the linear filesystem 
space
+get all inode information whose extents containing the given 'logical'
+address blcok and then resolve each filesystem path
 +
 `Options`
 +
 -P
-skip the path resolving and print the inodes instead
+skip the path resolving and print the inode number, owner's subvolume id
+and offset in the extent
 -v
 verbose mode, print count of returned paths and all ioctl() return values
 -s 
 set internal buffer for storing the file names to 'bufsize', default is 4096, 
maximum 64k
+--ignore-offset
+return all inode information which points to the extent
+containing the given logical address block regardless of offset.
+i.e. with this option, all files which includes at least one block of
+the extent will be shown, even if given logical address block is not
+included
+This requires version 2 ioctl support (BTRFS_IOC_LOGICAL_INO_V2, since 4.15)
 
 *min-dev-size* [options] ::
 (needs root privileges)
diff --git a/cmds-inspect.c b/cmds-inspect.c
index 21aa2903..c261e773 100644
--- a/cmds-inspect.c
+++ b/cmds-inspect.c
@@ -124,13 +124,20 @@ static int cmd_inspect_inode_resolve(int argc, char 
**argv)
 }
 
 static const char * const cmd_inspect_logical_resolve_usage[] = {
-   "btrfs inspect-internal logical-resolve [-Pv] [-s bufsize]  
",
+   "btrfs inspect-internal logical-resolve [options]  ",
"Get

[PATCH 0/3] btrfs-progs: ins: Update for logical-resolve command

2018-07-25 Thread Misono Tomohiro
Patch 1 and 2 fix some issues in logical-resolve.
Patch 3 is a main part and adds v2 support of BTRFS_IOC_LOGICA_INO_V2

Misono Tomohiro (3):
  btrfs-progs: ins: logical-resolve: Set correct error value
  btrfs-progs: ins: logical-resolve: Print message when path cannot be
resolved
  btrfs-progs: ins: logical-resolve: Add v2 ioctl support

 Documentation/btrfs-inspect-internal.asciidoc | 15 --
 cmds-inspect.c| 67 +--
 ioctl.h   | 11 -
 libbtrfsutil/btrfs.h  | 10 +++-
 4 files changed, 93 insertions(+), 10 deletions(-)

-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs-progs: ins: Add v2 ioctl support in logical-resolve

2018-07-18 Thread Misono Tomohiro
Add -i (ignore offset) option to logical-resolve command
to show how BTRFS_IOC_LOGICAL_INO_V2 ioctl works
(returns every ref to the extent of given logical address).

[Example]
$ mkfs.btrfs -f $DEV
$ mount $DEV /mnt

$ dd if=/dev/urandom of=/mnt/file bs=4k count=100
# split above extent
$ dd if=/dev/urandom of=/mnt/file bs=4k seek=10 count=1 conv=notrunc
$ btrfs filesystem sync

# v1
$ btrfs inspect-internal logical-resolve -P 13631488 /mnt
inode 257 offset 0 root 5

# v2
$ btrfs inspect-internal logical-resolve -iP 13631488 /mnt
inode 257 offset 0 root 5
inode 257 offset 45056 root 5

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-inspect-internal.asciidoc |  4 
 cmds-inspect.c| 17 +++--
 ioctl.h   | 10 +-
 libbtrfsutil/btrfs.h  | 10 +-
 4 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/Documentation/btrfs-inspect-internal.asciidoc 
b/Documentation/btrfs-inspect-internal.asciidoc
index e2db6466..a55c9add 100644
--- a/Documentation/btrfs-inspect-internal.asciidoc
+++ b/Documentation/btrfs-inspect-internal.asciidoc
@@ -125,6 +125,10 @@ skip the path resolving and print the inodes instead
 verbose mode, print count of returned paths and all ioctl() return values
 -s 
 set internal buffer for storing the file names to 'bufsize', default is 4096, 
maximum 64k
+-i
+ignore offset and return all the ref information
+which points to the extent containing given logical address.
+This requires version 2 ioctl support (BTRFS_IOC_LOGICAL_INO_V2, since 4.15).
 
 *min-dev-size* [options] ::
 (needs root privileges)
diff --git a/cmds-inspect.c b/cmds-inspect.c
index 2fc50c1a..d47eeacb 100644
--- a/cmds-inspect.c
+++ b/cmds-inspect.c
@@ -131,6 +131,9 @@ static const char * const 
cmd_inspect_logical_resolve_usage[] = {
"-s bufsize  set inode container's size. This is used to increase 
inode",
"container's size in case it is not enough to read all the 
",
"resolved results. The max value one can set is 64k",
+   "-i  ignore offset and return all the ref information",
+   "which points to the extent containing given logical 
address",
+   "(requires version 2 ioctl support)",
NULL
 };
 
@@ -142,7 +145,9 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
int verbose = 0;
int getpath = 1;
int bytes_left;
+   int ignore_offset = 0;
struct btrfs_ioctl_logical_ino_args loi;
+   unsigned long ioctl_num = BTRFS_IOC_LOGICAL_INO;
struct btrfs_data_container *inodes;
u64 size = 4096;
char full_path[PATH_MAX];
@@ -151,7 +156,7 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
 
optind = 0;
while (1) {
-   int c = getopt(argc, argv, "Pvs:");
+   int c = getopt(argc, argv, "Pvs:i");
if (c < 0)
break;
 
@@ -165,6 +170,9 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
case 's':
size = arg_strtou64(optarg);
break;
+   case 'i':
+   ignore_offset = 1;
+   break;
default:
usage(cmd_inspect_logical_resolve_usage);
}
@@ -183,13 +191,18 @@ static int cmd_inspect_logical_resolve(int argc, char 
**argv)
loi.size = size;
loi.inodes = ptr_to_u64(inodes);
 
+   if (ignore_offset) {
+   loi.flags = BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
+   ioctl_num = BTRFS_IOC_LOGICAL_INO_V2;
+   }
+
fd = btrfs_open_dir(argv[optind + 1], , 1);
if (fd < 0) {
ret = 12;
goto out;
}
 
-   ret = ioctl(fd, BTRFS_IOC_LOGICAL_INO, );
+   ret = ioctl(fd, ioctl_num, );
if (ret < 0) {
error("logical ino ioctl: %m");
goto out;
diff --git a/ioctl.h b/ioctl.h
index 709e996f..74f30c20 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -491,10 +491,16 @@ BUILD_ASSERT(sizeof(struct btrfs_ioctl_ino_path_args) == 
56);
 struct btrfs_ioctl_logical_ino_args {
__u64   logical;/* in */
__u64   size;   /* in */
-   __u64   reserved[4];
+   __u64   reserved[3];/* must be 0 for now */
+   __u64   flags;  /* in, v2 only */
/* struct btrfs_data_container  *inodes;out   */
__u64   inodes;
 };
+/*
+ * Return every ref to the extent, not just those containing logical block.
+ * Requires logical == extent bytenr.
+ */
+#define BTR

Re: [PATCH 2/2] btrfs-progs: check: enhanced progress indicator

2018-07-16 Thread Misono Tomohiro
On 2018/07/05 4:20, Stéphane Lesimple wrote:
> We reuse the task_position enum and task_ctx struct of the original progress
> indicator, adding more values and fields for our needs.
> 
> Then add hooks in all steps of the check to properly record progress.
> 
> Signed-off-by: Stéphane Lesimple 
> ---
>  check/main.c| 176 
> ++--
>  check/mode-common.h |  20 ++
>  check/mode-lowmem.c |   1 +
>  convert/main.c  |   2 +-
>  qgroup-verify.c |   7 +++
>  qgroup-verify.h |   2 +
>  task-utils.c|   8 ++-
>  task-utils.h|   3 +-
>  8 files changed, 154 insertions(+), 65 deletions(-)
> 
> diff --git a/check/main.c b/check/main.c
> index 3190b5d..bb3ebea 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -25,6 +25,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "ctree.h"
>  #include "volumes.h"
>  #include "repair.h"
> @@ -47,20 +48,6 @@
>  #include "check/mode-original.h"
>  #include "check/mode-lowmem.h"
>  
> -enum task_position {
> - TASK_EXTENTS,
> - TASK_FREE_SPACE,
> - TASK_FS_ROOTS,
> - TASK_NOTHING, /* have to be the last element */
> -};
> -
> -struct task_ctx {
> - int progress_enabled;
> - enum task_position tp;
> -
> - struct task_info *info;
> -};
> -
>  u64 bytes_used = 0;
>  u64 total_csum_bytes = 0;
>  u64 total_btree_bytes = 0;
> @@ -72,6 +59,7 @@ u64 data_bytes_referenced = 0;
>  LIST_HEAD(duplicate_extents);
>  LIST_HEAD(delete_items);
>  int no_holes = 0;
> +static int is_free_space_tree = 0;
>  int init_extent_tree = 0;
>  int check_data_csum = 0;
>  struct btrfs_fs_info *global_info;
> @@ -173,28 +161,48 @@ static int compare_extent_backref(struct rb_node 
> *node1, struct rb_node *node2)
>   return compare_tree_backref(node1, node2);
>  }
>  
> +static void print_status_check_line(void *p)
> +{
> + struct task_ctx *priv = p;
> + char *task_position_string[] = {
> + "[1/7] checking root items ",
> + "[2/7] checking extents",
> + is_free_space_tree ?
> + "[3/7] checking free space tree":
> + "[3/7] checking free space cache   ",
> + "[4/7] checking fs roots   ",
> + check_data_csum ?
> + "[5/7] checking csums against data ":
> + "[5/7] checking csums (without verifying data) ",
> + "[6/7] checking root refs  ",
> + "[7/7] checking quota groups   ",
> + };
> +
> + time_t elapsed = time(NULL) - priv->start_time;
> + int hours   = elapsed / 3600;
> + elapsed-= hours   * 3600;
> + int minutes = elapsed / 60;
> + elapsed-= minutes * 60;
> + int seconds = elapsed;
> + printf("%s (%d:%02d:%02d elapsed", task_position_string[priv->tp], 
> hours, minutes, seconds);
> + if (priv->item_count > 0)
> + printf(", %llu items checked)\r", priv->item_count);
> + else
> + printf(")\r");
> + fflush(stdout);
> +}
>  
>  static void *print_status_check(void *p)
>  {
>   struct task_ctx *priv = p;
> - const char work_indicator[] = { '.', 'o', 'O', 'o' };
> - uint32_t count = 0;
> - static char *task_position_string[] = {
> - "checking extents",
> - "checking free space cache",
> - "checking fs roots",
> - };
>  
> - task_period_start(priv->info, 1000 /* 1s */);
> + task_period_start(priv->info, 50);
>  
>   if (priv->tp == TASK_NOTHING)
>   return NULL;
>  
>   while (1) {
> - printf("%s [%c]\r", task_position_string[priv->tp],
> - work_indicator[count % 4]);
> - count++;
> - fflush(stdout);
> + print_status_check_line(p);
>   task_period_wait(priv->info);
>   }
>   return NULL;
> @@ -202,6 +210,7 @@ static void *print_status_check(void *p)
>  
>  static int print_status_return(void *p)
>  {
> + print_status_check_line(p);
>   printf("\n");
>   fflush(stdout);
>  
> @@ -2942,6 +2951,7 @@ static int check_root_refs(struct btrfs_root *root,
>   loop = 0;
>   cache = search_cache_extent(root_cache, 0);
>   while (1) {
> + ctx.item_count++;
>   if (!cache)
>   break;
>   rec = container_of(cache, struct root_record, cache);
> @@ -3263,6 +3273,7 @@ static int check_fs_root(struct btrfs_root *root,
>   }
>  
>   while (1) {
> + ctx.item_count++;
>   wret = walk_down_tree(root, , wc, , );
>   if (wret < 0)
>   ret = wret;
> @@ -3340,11 +3351,6 @@ static int check_fs_roots(struct btrfs_fs_info 
> *fs_info,
>   int 

Re: [PATCH v2 00/20] btrfs-progs: Rework of "subvolume list/show" and relax the root privileges of them

2018-07-04 Thread Misono Tomohiro
Gentle ping, as this is related to the new ioctls merged in 4.18-rc1.

On 2018/06/18 17:40, Misono Tomohiro wrote:
> Changelog
>  
>  v1 -> v2: 
>   generally update whole patch set, especially:
>- rebased to progs 4.17
>- Improve error handling
>- Update man/help/commit message
>- Add/Update several options of sub list:
>   -f ... follow mounted subvolumes
>   -a ... remove meaningless filter
>   -A ... print path in absolute path
>   --nosort ... output results incrementally
>  Please see below examples
> =
> github:  https://github.com/t-msn/btrfs-progs/tree/rework-sub-list
> 
> Hello,
> 
> This series requires some new ioctls which are now in kernel 4.18-rc1. 
> 
> The aim of this series is to relax the root privileges of "sub list/show"
> while keeping as much output consistency between root and non-privileged
> user. For "subvolume list", default output has been changed from current
> btrfs-progs (in both old and new kernel) and some options are newly added.
> For "subvolume show", root's output is the same as before but there are
> some difference from non-privileged user's output. 
> 
> Please see below examples.
> 
> 
> * Behavior summary of new "sub list"
>   - default (no option)
> - lists subvolumes below the specified path (inc. path itself)
> - If new ioctls exists
>   - the path can be non-subvolume directory
>   - non-privileged user can call it
> (subvolumes to which the user cannot access will be skipped)
> 
>   - -f
> - follow mounted subvolume below the specified path and list them too 
>   (only if it is the same filesystem)
> 
>   - -a
> - updated to remove filter. i.e. the output is the same as current progs
>   without option (require root privileges)
> 
>   - -A
> - print path in absolute path
> 
>   -- nosort
> - output results incrementally without loading information to memory
> 
>  [Example]
>   $ mkfs.btrfs -f $DEV
>   $ mkfs.btrfs -f $DEV2
>   $ mount $DEV $MNT
> 
>   $ btrfs subvolume create $MNT/AAA
>   $ btrfs subvolume create $MNT/BBB
>   $ btrfs subvolume create $MNT/CCC
>   $ btrfs subvolume create $MNT/DDD
>   $ mkdir $MNT/AAA/bbb
>   $ mkdir $MNT/AAA/ccc
>   $ mkdir $MNT/AAA/other
> 
>   $ umount $MNT
>   $ mount -o subvol=AAA $DEV $MNT
>   $ mount -o subvol=BBB $DEV $MNT/bbb
>   $ mount -o subvol=CCC $DEV $MNT/ccc
>   $ mount -o $DEV2 $MNT/other
> 
>   $ btrfs subvolume list $MNT # print subvolumes below the path
>   ID 256 gen 10 top level 5 path .
> 
>   $ btrfs subvolume list -A $MNT # print path in absolute path
>   ID 256 gen 10 top level 5 path /mnt
> 
>   $ btrfs subvolume list -f $MNT # follow mounted subvolumes too
>   ID 256 gen 10 top level 5 path .
>   ID 258 gen 7 top level 5 path bbb
>   ID 259 gen 8 top level 5 path ccc
> 
>   $ btrfs subvolume list -a $MNT
>   # print all subvolumes in the fs. same output as progs<=4.17 without option
>   ID 256 gen 10 top level 5 path AAA
>   ID 258 gen 7 top level 5 path BBB
>   ID 259 gen 8 top level 5 path CCC
>   ID 260 gen 9 top level 5 path DDD
> 
>  More details are in each commit log.
> 
> 
> * Behavior summary of new "sub show"
>   - No change for root's output
>   - If new ioctls exists, non-privileged user can call it
> - In that case, path to be shown is absolute path
>   (for root, it is relative to top-level subvolume)
>   Also, snapshots to be shown are to which the user can
>   access from current mount point.
>   (for root, all snapshots in the fs)
> 
> 
> * Patch structure
> The first several patches update libbtrfsutil and the latter patches update
> sub list/show command.
> 
>  1st patch is independent and updates man doc of btrfs-subvolume
> 
>  2nd-6th update the libbtrfsutil using new ioctls:
>- Relax the privileges of following functions if kernel supports new
>  ioctls and @top/@id is zero (i.e. the given path/fd is used instead
>  of arbitrary subvolume id).
>  - util_subvolume_info()
>  - subvolume iterator related ones (util_subvolume_iterator_next() etc.)
> 
>- For subvolume iterator, if kernel supports new ioctls and @top is zero,
>  non-subvolume directory can be specified as a start point. Also,
>  subvolume which cannot be accessed (either because of permission
>  error or not found (may happen if other volume is mounted in the
>  path) will be skipped for non-privileged user.
> 
>- Code path of root and non-privileged user is different. While root uses
>  TREE_SEARCH ioctl as before, non-privileged user 

[PATCH] btrfs-progs: doc: Update man 5 btrfs for 4.18

2018-07-03 Thread Misono Tomohiro
Update the information to reflect the status of 4.18

Main Updates:
- Add explanation of improved compression heuristic algorithm
- Add explanation that norecovery == nologreplay
- Add explanation of nossd_spread mount option
- Add explanation of rmdir_subovl feature

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-man5.asciidoc | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/Documentation/btrfs-man5.asciidoc 
b/Documentation/btrfs-man5.asciidoc
index 0529496a..e85b4d91 100644
--- a/Documentation/btrfs-man5.asciidoc
+++ b/Documentation/btrfs-man5.asciidoc
@@ -138,8 +138,9 @@ Otherwise some simple heuristics are applied to detect an 
incompressible file.
 If the first blocks written to a file are not compressible, the whole file is
 permanently marked to skip compression. As this is too simple, the
 'compress-force' is a workaround that will compress most of the files at the
-cost of some wasted CPU cycles on failed attempts. The heuristics of 'compress'
-will improve in the future so this will not be necessary.
+cost of some wasted CPU cycles on failed attempts.
+Since kernel 4.15, heuristic algorithm has been improved by using
+frequency sampling, repeated pattern detection and shannon entropy calculation.
 +
 NOTE: If compression is enabled, 'nodatacow' and 'nodatasum' are disabled.
 
@@ -300,6 +301,7 @@ which will effectively start the inode numbers from the 
beginning again.
 (default: on, even read-only)
 +
 Enable/disable log replay at mount time. See also 'treelog'.
+Note that 'nologreplay' is the same as 'norecovery'.
 +
 WARNING: currently, the tree log is replayed even with a read-only mount! To
 disable that behaviour, mount also with 'nologreplay'.
@@ -336,7 +338,8 @@ inlined files).
 (since: 4.5, default: off)
 +
 Do not attempt any data recovery at mount time. This will disable 'logreplay'
-and avoids other write operations.
+and avoids other write operations. Note that this option is the same as
+'nologreplay'
 +
 NOTE: The opposite option 'recovery' used to have different meaning but was
 changed for consistency with other filesystems, where 'norecovery' is used for
@@ -388,12 +391,13 @@ chosen, which is 'v1'.
 *ssd*::
 *ssd_spread*::
 *nossd*::
+*nossd_spread*::
 (default: SSD autodetected)
 +
 Options to control SSD allocation schemes.  By default, BTRFS will
 enable or disable SSD optimizations depending on status of a device with
 respect to rotational or non-rotational type. This is determined by the
-contents of '/sys/block/DEV/queue/rotational'). If it is 1, the 'ssd' option is
+contents of '/sys/block/DEV/queue/rotational'). If it is 0, the 'ssd' option is
 turned on.  The option 'nossd' will disable the autodetection.
 +
 The optimizations make use of the absence of the seek penalty that's inherent
@@ -411,7 +415,7 @@ fragmentation. The layout tuning has been kept intact for 
the option
 The 'ssd_spread' mount option attempts to allocate into bigger and aligned
 chunks of unused space, and may perform better on low-end SSDs.  'ssd_spread'
 implies 'ssd', enabling all other SSD heuristics as well. The option 'nossd'
-will disable all SSD options.
+will disable all SSD options while 'nossd_spread' only disables 'ssd_spread'.
 
 *subvol='path'*::
 Mount subvolume from 'path' rather than the toplevel subvolume. The
@@ -470,7 +474,9 @@ root user can do that.
 NOTE: historically, any user could create a snapshot even if he was not owner
 of the source subvolume, the subvolume deletion has been restricted for that
 reason. The subvolume creation has been restricted but this mount option is
-still required. This is a usability issue and will be addressed in the future.
+still required. This is a usability issue and therefore
+since 4.18, rmdir syscall can delete an empty subvolume just like an ordinary
+directory (See 'rmdir_subvol' feature in 'FILESYSTEM FEATURES').
 
 DEPRECATED MOUNT OPTIONS
 
@@ -605,12 +611,17 @@ stored as an extent, saves a few percent of metadata if 
sparse files are used
 +
 the filesystem contains or contained a raid56 profile of block groups
 
+*rmdir_subvol*::
+(since: 4.18)
++
+indicate that rmdir syscall can delete an empty subvolume just like an ordinary
+directory. Note that this feature only depends on the kernel version.
+
 *skinny_metadata*::
 (since: 3.10)
 +
 reduced-size metadata for extent references, saves a few percent of metadata
 
-
 FILESYSTEM LIMITS
 -
 
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: qgroups: Move transaction managed inside btrfs_quota_enable

2018-07-03 Thread Misono Tomohiro



On 2018/07/02 20:00, Nikolay Borisov wrote:
> Commit 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to
> btrfs_quota_enable") not only resulted in an easier to follow code but
> it also introduced a subtle bug. It changed the timing when the initial
> transaction rescan was happening - before the commit it would happen
> after transaction commit had occured but after the commit it might happen
> before the transaction was committed. This results in failure to
> correctly rescan the quota since there could be data which is still not
> committed on disk.
> 
> This patch aims to fix this by movign the transaction creation/commit
> inside btrfs_quota_enable, which allows to schedule the quota commit
> after the transaction has been committed.
> 
> Fixes: 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to 
> btrfs_quota_enable")
> Link: https://marc.info/?l=linux-btrfs=152999289017582
> Reported-by: Misono Tomohiro 
> Reviewed-by: Misono Tomohiro 
> Signed-off-by: Nikolay Borisov 
> ---
>  fs/btrfs/ioctl.c  | 15 ++-
>  fs/btrfs/qgroup.c | 38 +++---
>  fs/btrfs/qgroup.h |  6 ++
>  3 files changed, 35 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index a399750b9e41..316fb1af15e2 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -5135,9 +5135,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
> void __user *arg)
>   struct inode *inode = file_inode(file);
>   struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>   struct btrfs_ioctl_quota_ctl_args *sa;
> - struct btrfs_trans_handle *trans = NULL;
>   int ret;
> - int err;
>  
>   if (!capable(CAP_SYS_ADMIN))
>   return -EPERM;
> @@ -5153,28 +5151,19 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
> void __user *arg)
>   }
>  
>   down_write(_info->subvol_sem);
> - trans = btrfs_start_transaction(fs_info->tree_root, 2);
> - if (IS_ERR(trans)) {
> - ret = PTR_ERR(trans);
> - goto out;
> - }
>  
>   switch (sa->cmd) {
>   case BTRFS_QUOTA_CTL_ENABLE:
> - ret = btrfs_quota_enable(trans, fs_info);
> + ret = btrfs_quota_enable(fs_info);
>   break;
>   case BTRFS_QUOTA_CTL_DISABLE:
> - ret = btrfs_quota_disable(trans, fs_info);
> + ret = btrfs_quota_disable(fs_info);
>   break;
>   default:
>   ret = -EINVAL;
>   break;
>   }
>  
> - err = btrfs_commit_transaction(trans);
> - if (err && !ret)
> - ret = err;
> -out:
>   kfree(sa);
>   up_write(_info->subvol_sem);
>  drop_write:
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index c25dc47210a3..1012c7138633 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -875,8 +875,7 @@ static int btrfs_clean_quota_tree(struct 
> btrfs_trans_handle *trans,
>   return ret;
>  }
>  
> -int btrfs_quota_enable(struct btrfs_trans_handle *trans,
> -struct btrfs_fs_info *fs_info)
> +int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
>  {
>   struct btrfs_root *quota_root;
>   struct btrfs_root *tree_root = fs_info->tree_root;
> @@ -886,6 +885,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   struct btrfs_key key;
>   struct btrfs_key found_key;
>   struct btrfs_qgroup *qgroup = NULL;
> + struct btrfs_trans_handle *trans = NULL;
>   int ret = 0;
>   int slot;
>  
> @@ -893,6 +893,12 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   if (fs_info->quota_root)
>   goto out;
>  
> + trans = btrfs_start_transaction(tree_root, 2);
> + if (IS_ERR(trans)) {
> + ret = PTR_ERR(trans);
> + goto out;
> + }
> +
fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
>   if (!fs_info->qgroup_ulist) {
>   ret = -ENOMEM;
> @@ -987,6 +993,11 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   fs_info->quota_root = quota_root;
>   set_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>   spin_unlock(_info->qgroup_lock);
> +
> + ret = btrfs_commit_transaction(trans);
> + if (ret)
> + goto out_free_path;
> +

I realized that some error paths also need to finish transaction (continue to 
below). 

>   ret = qgroup_rescan_init(fs_info, 0, 1);
>   if (!ret) {
>   qgroup_rescan_zero_tracking(fs_info);
> @@ -1011,15 +1022,22 @@ int btrfs_quota_enable(

Re: Enabling quota may not correctly rescan on 4.17

2018-07-02 Thread Misono Tomohiro
> Misono,
> 
> Can you please try the attached patch?
> 

I tried and it works (on 4.18.0-rc3).

Committing transaction before starting rescan worker is
what btrfs_qgroup_resan() does, so it looks fine. 

(though I'm not sure why you don't see the problem in your machine.)

Reviewed-by: Misono Tomohiro 
Thanks.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Enabling quota may not correctly rescan on 4.17

2018-06-28 Thread Misono Tomohiro
On 2018/06/28 16:12, Qu Wenruo wrote:
> 
> 
> On 2018年06月27日 16:25, Misono Tomohiro wrote:
>> On 2018/06/27 17:10, Qu Wenruo wrote:
>>>
>>>
>>> On 2018年06月26日 14:00, Misono Tomohiro wrote:
>>>> Hello Nikolay,
>>>>
>>>> I noticed that commit 5d23515be669 ("btrfs: Move qgroup rescan
>>>> on quota enable to btrfs_quota_enable") in 4.17 sometimes causes
>>>> to fail correctly rescanning quota when quota is enabled.
>>>>
>>>> Simple reproducer:
>>>>
>>>> $ mkfs.btrfs -f $DEV
>>>> $ mount $DEV /mnt
>>>> $ dd if=/dev/urandom of=/mnt/file bs=1000 count=1000
>>>> $ btrfs quota enbale /mnt
>>>> $ umount /mnt
>>>> $ btrfs check $DEV
>>>> ...
> Unfortunately in my environment, btrfs/114 failed to reprocduce it with
> 1024 runs overnight, with v4.18-rc1 kernel.
> 
> Would you please provide the whole btrfs-image dump of the corrupted fs?

Yes.
The attached file is an image-dump of above reproducer (kernel 4.17.0, progs 
4.17)
as the dump of btrfs/114 is a bit large for mail.

Though this does not always happen, I see the failure both on 4.17.0 or 
4.18-rc2.

Thanks,
Tomohiro Misono

> 
> There are several different assumptions on how the bug happens, with
> your btrfs-image dump, it would help a lot to rule out some assumption.
> 
> Thanks,
> Qu


btrfs-image
Description: Binary data


Re: [PATCH v1] btrfs: quota: Set rescan progress to (u64)-1 if we hit last leaf

2018-06-27 Thread Misono Tomohiro
On 2018/06/27 19:19, Qu Wenruo wrote:
> Commit ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf
> of extent tree") added a new exit for rescan finish.
> 
> However after finishing quota rescan, we set
> fs_info->qgroup_rescan_progress to (u64)-1 before we exit through the
> original exit path.
> While we missed that assignment of (u64)-1 in the new exit path.
> 
> The end result is, the quota status item doesn't have the same value.
> (-1 vs the last bytenr + 1)
> Although it doesn't affect quota accounting, it's still better to keep
> the original behavior.
> 
> Reported-by: Misono Tomohiro 
> Fixes: ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf of 
> extent tree")
> Signed-off-by: Qu Wenruo 
> ---
> changelog:
> v2:
>   Commit message update, as the bug only changes the resulting quota status
>   item without impacting the behavior.


Reviewed-by: Misono Tomohiro 

(As you said, the problem I reported in 
https://marc.info/?t=15299930357=1=2 is not related to this change)

Thanks,
Tomohiro Misono

> ---
>  fs/btrfs/qgroup.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 1874a6d2e6f5..99f2b9ce0f15 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -2680,8 +2680,10 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, 
> struct btrfs_path *path,
>   free_extent_buffer(scratch_leaf);
>   }
>  
> - if (done && !ret)
> + if (done && !ret) {
>   ret = 1;
> + fs_info->qgroup_rescan_progress.objectid = (u64)-1;
> + }
>   return ret;
>  }
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Enabling quota may not correctly rescan on 4.17

2018-06-27 Thread Misono Tomohiro
On 2018/06/27 17:22, Nikolay Borisov wrote:
> 
> 
> On 27.06.2018 11:20, Misono Tomohiro wrote:
>> I can see the failure with or without options...
>> maybe it depends on machine spec?
> 
> I'm testing in a virtual machine: 
> 
> qemu-system-x86_64 -smp 6 -kernel 
> /home/nborisov/projects/kernel/source/arch/x86_64/boot/bzImage -append 
> root=/dev/vda rw -enable-kvm -m 4096 -drive 
> file=/home/nborisov/projects/qemu/rootfs/ubuntu15.img,if=virtio -virtfs 
> local,id=fsdev1,path=/mnt/vm_share,security_model=passthrough,mount_tag=hostshare
>  -drive file=/home/nborisov/scratch/scratch-images/btrfs-test.img,if=virtio 
> -drive file=/home/nborisov/scratch/scratch-images/scratch2.img,if=virtio 
> -drive file=/home/nborisov/scratch/scratch-images/scratch3.img,if=virtio 
> -drive file=/home/nborisov/scratch/scratch-images/scratch4.img,if=virtio 
> -drive file=/home/nborisov/scratch/scratch-images/scratch5.img,if=virtio 
> -drive file=/home/nborisov/scratch/scratch-images/scratch6.img,if=virtio 
> -redir tcp:1235::22 -daemonize
> 
> Perhaps it's not visible on slow storage. Are you testing on NVME or 
> something like that? 

No, I use sata ssd and hdd and the problem can be seen on both.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Enabling quota may not correctly rescan on 4.17

2018-06-27 Thread Misono Tomohiro
On 2018/06/27 17:10, Qu Wenruo wrote:
> 
> 
> On 2018年06月26日 14:00, Misono Tomohiro wrote:
>> Hello Nikolay,
>>
>> I noticed that commit 5d23515be669 ("btrfs: Move qgroup rescan
>> on quota enable to btrfs_quota_enable") in 4.17 sometimes causes
>> to fail correctly rescanning quota when quota is enabled.
>>
>> Simple reproducer:
>>
>> $ mkfs.btrfs -f $DEV
>> $ mount $DEV /mnt
>> $ dd if=/dev/urandom of=/mnt/file bs=1000 count=1000
>> $ btrfs quota enbale /mnt
>> $ umount /mnt
>> $ btrfs check $DEV
>> ...
>> checking quota groups
>> Counts for qgroup id: 0/5 are different
>> our:referenced 1019904 referenced compressed 1019904
>> disk:   referenced 16384 referenced compressed 16384
>> diff:   referenced 1003520 referenced compressed 1003520
>> our:exclusive 1019904 exclusive compressed 1019904
>> disk:   exclusive 16384 exclusive compressed 16384
>> diff:   exclusive 1003520 exclusive compressed 1003520
>> found 1413120 bytes used, error(s) found
>> ...
>>
>> This can be also observed in btrfs/114. (Note that progs < 4.17
>> returns error code 0 even if quota is not consistency and therefore
>> test will incorrectly pass.)
> 
> BTW, would you please try to dump the quota tree for such mismatch case?
> 
> It could be a btrfs-progs bug which it should skip quota checking if it
> found the quota status item has RESCAN flag.

Yes, this is what I see after running btrfs/114 (/dev/sdh1 is scratch dev):

$ sudo btrfs check -Q /dev/sdh1
Checking filesystem on /dev/sdh1
UUID: d07f6028-0ae7-40d4-ac45-01a4505ddcfb
Print quota groups for /dev/sdh1
UUID: d07f6028-0ae7-40d4-ac45-01a4505ddcfb
Counts for qgroup id: 0/5 are different
our:referenced 170999808 referenced compressed 170999808
disk:   referenced 16384 referenced compressed 16384
diff:   referenced 170983424 referenced compressed 170983424
our:exclusive 170999808 exclusive compressed 170999808
disk:   exclusive 16384 exclusive compressed 16384
diff:   exclusive 170983424 exclusive compressed 170983424


$ sudo btrfs inspect-internal dump-tree -t quota /dev/sdh1
btrfs-progs v4.17
quota tree key (QUOTA_TREE ROOT_ITEM 0)
leaf 213958656 items 3 free space 16096 generation 9 owner QUOTA_TREE
leaf 213958656 flags 0x1(WRITTEN) backref revision 1
fs uuid d07f6028-0ae7-40d4-ac45-01a4505ddcfb
chunk uuid 78d753d0-eeb7-4c3e-b825-b6c2c5de5c7a
item 0 key (0 QGROUP_STATUS 0) itemoff 16251 itemsize 32
version 1 generation 9 flags ON scan 30572545
item 1 key (0 QGROUP_INFO 0/5) itemoff 16211 itemsize 40
generation 7
referenced 16384 referenced_compressed 16384
exclusive 16384 exclusive_compressed 16384
item 2 key (0 QGROUP_LIMIT 0/5) itemoff 16171 itemsize 40
flags 0
max_referenced 0 max_exclusive 0
rsv_referenced 0 rsv_exclusive 0
total bytes 26843545600
bytes used 171769856
uuid d07f6028-0ae7-40d4-ac45-01a4505ddcfb


And if I mount+rescan again:

$ sudo mount /dev/sdh1 /mnt
$ sudo btrfs quota rescan -w /mnt
$ sudo umount /mnt

$ sudo btrfs check -Q /dev/sdh1
Checking filesystem on /dev/sdh1
UUID: d07f6028-0ae7-40d4-ac45-01a4505ddcfb
Print quota groups for /dev/sdh1
UUID: d07f6028-0ae7-40d4-ac45-01a4505ddcfb
Counts for qgroup id: 0/5
our:referenced 170999808 referenced compressed 170999808
disk:   referenced 170999808 referenced compressed 170999808
our:exclusive 170999808 exclusive compressed 170999808
disk:   exclusive 170999808 exclusive compressed 170999808

$ sudo btrfs inspect-internal dump-tree -t quota /dev/sdh1
btrfs-progs v4.17
quota tree key (QUOTA_TREE ROOT_ITEM 0)
leaf 31309824 items 3 free space 16096 generation 13 owner QUOTA_TREE
leaf 31309824 flags 0x1(WRITTEN) backref revision 1
fs uuid d07f6028-0ae7-40d4-ac45-01a4505ddcfb
chunk uuid 78d753d0-eeb7-4c3e-b825-b6c2c5de5c7a
item 0 key (0 QGROUP_STATUS 0) itemoff 16251 itemsize 32
version 1 generation 13 flags ON scan 213827585
item 1 key (0 QGROUP_INFO 0/5) itemoff 16211 itemsize 40
generation 11
referenced 170999808 referenced_compressed 170999808
exclusive 170999808 exclusive_compressed 170999808
item 2 key (0 QGROUP_LIMIT 0/5) itemoff 16171 itemsize 40
flags 0
max_referenced 0 max_exclusive 0
rsv_referenced 0 rsv_exclusive 0
total bytes 26843545600
bytes used 171769856
uuid d07f6028-0ae7-40d4-ac45-01a4505ddcfb

> 
> Thanks,
> Qu> 
>>
>> My observation is that this commit changed to call initial quota rescan
>> when quota is enabeld instead of first com

Re: Enabling quota may not correctly rescan on 4.17

2018-06-27 Thread Misono Tomohiro
On 2018/06/27 17:04, Nikolay Borisov wrote:
> 
> 
> On 27.06.2018 10:55, Misono Tomohiro wrote:
>> On 2018/06/27 16:40, Nikolay Borisov wrote:
>>>
>>>
>>> On 26.06.2018 09:00, Misono Tomohiro wrote:
>>>> Hello Nikolay,
>>>>
>>>> I noticed that commit 5d23515be669 ("btrfs: Move qgroup rescan
>>>> on quota enable to btrfs_quota_enable") in 4.17 sometimes causes
>>>> to fail correctly rescanning quota when quota is enabled.
>>>>
>>>> Simple reproducer:
>>>>
>>>> $ mkfs.btrfs -f $DEV
>>>> $ mount $DEV /mnt
>>>> $ dd if=/dev/urandom of=/mnt/file bs=1000 count=1000
>>>> $ btrfs quota enbale /mnt
>>>> $ umount /mnt
>>>> $ btrfs check $DEV
>>>> ...
>>>> checking quota groups
>>>> Counts for qgroup id: 0/5 are different
>>>> our:referenced 1019904 referenced compressed 1019904
>>>> disk:   referenced 16384 referenced compressed 16384
>>>> diff:   referenced 1003520 referenced compressed 1003520
>>>> our:exclusive 1019904 exclusive compressed 1019904
>>>> disk:   exclusive 16384 exclusive compressed 16384
>>>> diff:   exclusive 1003520 exclusive compressed 1003520
>>>> found 1413120 bytes used, error(s) found
>>>> ...
>>>
>>> I ran your script 100 times with progs 4.17 and 4.18-rc1 and didn't
>>> observe this error. I didn't observe btrfs/114 also failing but I ran it
>>> a lot less. Is there anything else i can do to make your small
>>> reproducer more likely to trigger?
>>
>> How about btrfs/114? I saw the problem in it first (progs 4.17/kernel 
>> 4.18-rc2)
>> and it seems always happen in my environment. 
> 
> So far nothing, I'm using David's github/misc-next branch, and latest
> commit is: 5330a89b3ee3.
> 
> My mount options are:
> 
> MOUNT_OPTIONS -- -o enospc_debug -o space_cache=v2 /dev/vdc /media/scratch

I can see the failure with or without options...
maybe it depends on machine spec?

> 
>>
>>>
>>>>
>>>> This can be also observed in btrfs/114. (Note that progs < 4.17
>>>> returns error code 0 even if quota is not consistency and therefore
>>>> test will incorrectly pass.)
>>>>
>>>> My observation is that this commit changed to call initial quota rescan
>>>> when quota is enabeld instead of first comit transaction after enabling
>>>> quota, and therefore if there is something not commited at that time,
>>>> their usage will not be accounted.
>>>>
>>>> Actually this can be simply fixed by calling "btrfs rescan" again or
>>>> calling "btrfs fi sync" before "btrfs quota enable".
>>>>
>>>> I think the commit itself makes the code much easier to read, so it may
>>>> be better to fix the problem in progs (i.e. calling sync before quota 
>>>> enable).
>>>>
>>>> Do you have any thoughts?
>>>>
>>>> Thanks,
>>>> Tomohiro Misono
>>>>
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>>> the body of a message to majord...@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>> the body of a message to majord...@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Enabling quota may not correctly rescan on 4.17

2018-06-27 Thread Misono Tomohiro
On 2018/06/27 16:40, Nikolay Borisov wrote:
> 
> 
> On 26.06.2018 09:00, Misono Tomohiro wrote:
>> Hello Nikolay,
>>
>> I noticed that commit 5d23515be669 ("btrfs: Move qgroup rescan
>> on quota enable to btrfs_quota_enable") in 4.17 sometimes causes
>> to fail correctly rescanning quota when quota is enabled.
>>
>> Simple reproducer:
>>
>> $ mkfs.btrfs -f $DEV
>> $ mount $DEV /mnt
>> $ dd if=/dev/urandom of=/mnt/file bs=1000 count=1000
>> $ btrfs quota enbale /mnt
>> $ umount /mnt
>> $ btrfs check $DEV
>> ...
>> checking quota groups
>> Counts for qgroup id: 0/5 are different
>> our:referenced 1019904 referenced compressed 1019904
>> disk:   referenced 16384 referenced compressed 16384
>> diff:   referenced 1003520 referenced compressed 1003520
>> our:exclusive 1019904 exclusive compressed 1019904
>> disk:   exclusive 16384 exclusive compressed 16384
>> diff:   exclusive 1003520 exclusive compressed 1003520
>> found 1413120 bytes used, error(s) found
>> ...
> 
> I ran your script 100 times with progs 4.17 and 4.18-rc1 and didn't
> observe this error. I didn't observe btrfs/114 also failing but I ran it
> a lot less. Is there anything else i can do to make your small
> reproducer more likely to trigger?

How about btrfs/114? I saw the problem in it first (progs 4.17/kernel 4.18-rc2)
and it seems always happen in my environment. 

> 
>>
>> This can be also observed in btrfs/114. (Note that progs < 4.17
>> returns error code 0 even if quota is not consistency and therefore
>> test will incorrectly pass.)
>>
>> My observation is that this commit changed to call initial quota rescan
>> when quota is enabeld instead of first comit transaction after enabling
>> quota, and therefore if there is something not commited at that time,
>> their usage will not be accounted.
>>
>> Actually this can be simply fixed by calling "btrfs rescan" again or
>> calling "btrfs fi sync" before "btrfs quota enable".
>>
>> I think the commit itself makes the code much easier to read, so it may
>> be better to fix the problem in progs (i.e. calling sync before quota 
>> enable).
>>
>> Do you have any thoughts?
>>
>> Thanks,
>> Tomohiro Misono
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: qgroups: Move transaction managed inside btrfs_quota_enable

2018-06-26 Thread Misono Tomohiro
On 2018/06/26 16:09, Nikolay Borisov wrote:
> Commit 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to
> btrfs_quota_enable") not only resulted in an easier to follow code but
> it also introduced a subtle bug. It changed the timing when the initial
> transaction rescan was happening - before the commit it would happen
> after transaction commit had occured but after the commit it might happen
> before the transaction was committed. This results in failure to
> correctly rescan the quota since there could be data which is still not
> committed on disk.
> 
> This patch aims to fix this by movign the transaction creation/commit
> inside btrfs_quota_enable, which allows to schedule the quota commit
> after the transaction has been committed.
> 
> Fixes: 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to 
> btrfs_quota_enable")
> Reported-by: Misono Tomohiro 
> Signed-off-by: Nikolay Borisov 
> ---
> Hi Misono, 
> 
> Care to test the following patch ? If you say it's ok I will do a similar one 
> for the btrfs_quota_disable function. This will also allow me to get rid of 
> the extra err variable in btrfs_ioctl_quota_ctl. Additionally I think the 
> number of blocks (2) passed to the transaction for reservation might be 
> wrong. 

Hi,

The patch does not removes start_transaction() from btrfs_ioctl_quota_ctl(),
so this does not work but I understand your approach (continue to  below).

> 
>  fs/btrfs/ioctl.c  |  2 +-
>  fs/btrfs/qgroup.c | 17 ++---
>  fs/btrfs/qgroup.h |  3 +--
>  3 files changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index a399750b9e41..bf99d7aae3ae 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -5161,7 +5161,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
> void __user *arg)
>  
>   switch (sa->cmd) {
>   case BTRFS_QUOTA_CTL_ENABLE:
> - ret = btrfs_quota_enable(trans, fs_info);
> + ret = btrfs_quota_enable(fs_info);
>   break;
>   case BTRFS_QUOTA_CTL_DISABLE:
>   ret = btrfs_quota_disable(trans, fs_info);
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 1874a6d2e6f5..91bb7e97c0d0 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -875,8 +875,7 @@ static int btrfs_clean_quota_tree(struct 
> btrfs_trans_handle *trans,
>   return ret;
>  }
>  
> -int btrfs_quota_enable(struct btrfs_trans_handle *trans,
> -struct btrfs_fs_info *fs_info)
> +int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
>  {
>   struct btrfs_root *quota_root;
>   struct btrfs_root *tree_root = fs_info->tree_root;
> @@ -886,6 +885,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   struct btrfs_key key;
>   struct btrfs_key found_key;
>   struct btrfs_qgroup *qgroup = NULL;
> + struct btrfs_trans_handle *trans = NULL;
>   int ret = 0;
>   int slot;
>  
> @@ -893,6 +893,12 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   if (fs_info->quota_root)
>   goto out;
>  
> + trans = btrfs_start_transaction(tree_root, 2);

(Should we use fs_root for quota?)

> + if (IS_ERR(trans)) {
> + ret = PTR_ERR(trans);
> + goto out;
> + }
> +
>   fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
>   if (!fs_info->qgroup_ulist) {
>   ret = -ENOMEM;
> @@ -987,6 +993,11 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>   fs_info->quota_root = quota_root;
>   set_bit(BTRFS_FS_QUOTA_ENABLED, _info->flags);
>   spin_unlock(_info->qgroup_lock);
> +
> + ret = btrfs_commit_transaction(trans);
> + if (ret)
> + goto out_free_path;
> +
>   ret = qgroup_rescan_init(fs_info, 0, 1);

However, I'm not sure if this approach completely works well when some files are
concurrently written while quota is being enabled.
Since before the commit 5d23515be669, quota_rescan_init() is called during 
transaction
commit, but now quota_rescan_init() is called outside of transacation.
So, is there still a slight possibility that the same problem occurs here?

(I don't completely understands how quota works yet , so correct me if I'm 
wrong.)

>   if (!ret) {
>   qgroup_rescan_zero_tracking(fs_info);
> @@ -3061,7 +3072,7 @@ static int __btrfs_qgroup_release_data(struct inode 
> *inode,
>   if (free && reserved)
>   return qgroup_free_reserved_data(inode, reserved, start, len);
>   extent_changeset_init();
> - ret = clear_record_extent_bits(_I(inode)->io_tree, start, 
> + ret = clear_record

Enabling quota may not correctly rescan on 4.17

2018-06-26 Thread Misono Tomohiro
Hello Nikolay,

I noticed that commit 5d23515be669 ("btrfs: Move qgroup rescan
on quota enable to btrfs_quota_enable") in 4.17 sometimes causes
to fail correctly rescanning quota when quota is enabled.

Simple reproducer:

$ mkfs.btrfs -f $DEV
$ mount $DEV /mnt
$ dd if=/dev/urandom of=/mnt/file bs=1000 count=1000
$ btrfs quota enbale /mnt
$ umount /mnt
$ btrfs check $DEV
...
checking quota groups
Counts for qgroup id: 0/5 are different
our:referenced 1019904 referenced compressed 1019904
disk:   referenced 16384 referenced compressed 16384
diff:   referenced 1003520 referenced compressed 1003520
our:exclusive 1019904 exclusive compressed 1019904
disk:   exclusive 16384 exclusive compressed 16384
diff:   exclusive 1003520 exclusive compressed 1003520
found 1413120 bytes used, error(s) found
...

This can be also observed in btrfs/114. (Note that progs < 4.17
returns error code 0 even if quota is not consistency and therefore
test will incorrectly pass.)

My observation is that this commit changed to call initial quota rescan
when quota is enabeld instead of first comit transaction after enabling
quota, and therefore if there is something not commited at that time,
their usage will not be accounted.

Actually this can be simply fixed by calling "btrfs rescan" again or
calling "btrfs fi sync" before "btrfs quota enable".

I think the commit itself makes the code much easier to read, so it may
be better to fix the problem in progs (i.e. calling sync before quota enable).

Do you have any thoughts?

Thanks,
Tomohiro Misono


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: Use iocb to derive pos instead of passing a separate parameter

2018-06-25 Thread Misono Tomohiro
On 2018/06/26 1:20, David Sterba wrote:
> On Mon, Jun 25, 2018 at 01:58:58PM +0900, Misono Tomohiro wrote:
>> So, this is the updated version of 
>> https://patchwork.kernel.org/patch/10063039/
>>
>> This time xfstest is ok and
>>  Reviewed-by: Misono Tomohiro 
> 
> Your comment about invalidate_mapping_pages is also ok, right? As
> filemap_fdatawait_range and invalidate_mapping_pages use the same
> start/end of the range.
> 

This time local variable 'pos' is kept to have the same value before and
invalidate_mapping_pages() uses it, so it should be ok.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: Use iocb to derive pos instead of passing a separate parameter

2018-06-24 Thread Misono Tomohiro
So, this is the updated version of https://patchwork.kernel.org/patch/10063039/

This time xfstest is ok and
 Reviewed-by: Misono Tomohiro 

On 2018/06/18 2:39, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues 
> 
> struct kiocb carries the ki_pos, so there is no need to pass it as
> a separate function parameter.
> 
> generic_file_direct_write() increments ki_pos, so we now assign pos
> after the function.
> 
> Signed-off-by: Goldwyn Rodrigues 
> ---
>  fs/btrfs/file.c | 15 ---
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index f660ba1e5e58..f84100a60cec 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1569,10 +1569,11 @@ static noinline int check_can_nocow(struct 
> btrfs_inode *inode, loff_t pos,
>   return ret;
>  }
>  
> -static noinline ssize_t __btrfs_buffered_write(struct file *file,
> -struct iov_iter *i,
> -loff_t pos)
> +static noinline ssize_t __btrfs_buffered_write(struct kiocb *iocb,
> +struct iov_iter *i)
>  {
> + struct file *file = iocb->ki_filp;
> + loff_t pos = iocb->ki_pos;
>   struct inode *inode = file_inode(file);
>   struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>   struct btrfs_root *root = BTRFS_I(inode)->root;
> @@ -1804,7 +1805,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, 
> struct iov_iter *from)
>  {
>   struct file *file = iocb->ki_filp;
>   struct inode *inode = file_inode(file);
> - loff_t pos = iocb->ki_pos;
> + loff_t pos;
>   ssize_t written;
>   ssize_t written_buffered;
>   loff_t endbyte;
> @@ -1815,8 +1816,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, 
> struct iov_iter *from)
>   if (written < 0 || !iov_iter_count(from))
>   return written;
>  
> - pos += written;
> - written_buffered = __btrfs_buffered_write(file, from, pos);
> + pos = iocb->ki_pos;
> + written_buffered = __btrfs_buffered_write(iocb, from);
>   if (written_buffered < 0) {
>   err = written_buffered;
>   goto out;
> @@ -1953,7 +1954,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
>   if (iocb->ki_flags & IOCB_DIRECT) {
>   num_written = __btrfs_direct_write(iocb, from);
>   } else {
> - num_written = __btrfs_buffered_write(file, from, pos);
> + num_written = __btrfs_buffered_write(iocb, from);
>   if (num_written > 0)
>   iocb->ki_pos = pos + num_written;
>   if (clean_page)
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 03/20] btrfs-progs: libbtrfsutil: Factor out btrfs_util_subvolume_info_fd()

2018-06-18 Thread Misono Tomohiro
Factor out main logic of btrfs_util_subvolume_info_fd().
This is a preparation work to relax the root privilege of this function.

No functional change happens.

Signed-off-by: Misono Tomohiro 
---
 libbtrfsutil/subvolume.c | 45 ++---
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/libbtrfsutil/subvolume.c b/libbtrfsutil/subvolume.c
index 867b3e10..0d7ef5bf 100644
--- a/libbtrfsutil/subvolume.c
+++ b/libbtrfsutil/subvolume.c
@@ -295,8 +295,8 @@ PUBLIC enum btrfs_util_error 
btrfs_util_subvolume_info(const char *path,
return err;
 }
 
-PUBLIC enum btrfs_util_error btrfs_util_subvolume_info_fd(int fd, uint64_t id,
- struct 
btrfs_util_subvolume_info *subvol)
+static enum btrfs_util_error get_subvolume_info_root(int fd, uint64_t id,
+struct 
btrfs_util_subvolume_info *subvol)
 {
struct btrfs_ioctl_search_args search = {
.key = {
@@ -310,27 +310,10 @@ PUBLIC enum btrfs_util_error 
btrfs_util_subvolume_info_fd(int fd, uint64_t id,
.nr_items = 0,
},
};
-   enum btrfs_util_error err;
size_t items_pos = 0, buf_off = 0;
bool need_root_item = true, need_root_backref = true;
int ret;
 
-   if (id == 0) {
-   err = btrfs_util_is_subvolume_fd(fd);
-   if (err)
-   return err;
-
-   err = btrfs_util_subvolume_id_fd(fd, );
-   if (err)
-   return err;
-   }
-
-   if ((id < BTRFS_FIRST_FREE_OBJECTID && id != BTRFS_FS_TREE_OBJECTID) ||
-   id > BTRFS_LAST_FREE_OBJECTID) {
-   errno = ENOENT;
-   return BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND;
-   }
-
search.key.min_objectid = search.key.max_objectid = id;
 
if (subvol) {
@@ -400,6 +383,30 @@ PUBLIC enum btrfs_util_error 
btrfs_util_subvolume_info_fd(int fd, uint64_t id,
return BTRFS_UTIL_OK;
 }
 
+PUBLIC enum btrfs_util_error btrfs_util_subvolume_info_fd(int fd, uint64_t id,
+ struct 
btrfs_util_subvolume_info *subvol)
+{
+   enum btrfs_util_error err;
+
+   if (id == 0) {
+   err = btrfs_util_is_subvolume_fd(fd);
+   if (err)
+   return err;
+
+   err = btrfs_util_subvolume_id_fd(fd, );
+   if (err)
+   return err;
+   }
+
+   if ((id < BTRFS_FIRST_FREE_OBJECTID && id != BTRFS_FS_TREE_OBJECTID) ||
+   id > BTRFS_LAST_FREE_OBJECTID) {
+   errno = ENOENT;
+   return BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND;
+   }
+
+   return get_subvolume_info_root(fd, id, subvol);
+}
+
 PUBLIC enum btrfs_util_error btrfs_util_get_subvolume_read_only_fd(int fd,
   bool 
*read_only_ret)
 {
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 10/20] btrfs-progs: sub list: Add -A option to output path in absolute path

2018-06-18 Thread Misono Tomohiro
By default, the printed path is relative to the specified path.
Let's add option to print absolute path with -A option.

[Example]
 $ mkfs.btrfs -f $DEV
 $ mount $DEV /mnt

 $ btrfs subvolume create /mnt/AAA

 $ btrfs subvolume list /mnt
 ID 256 gen 6 top level 5 path AAA

 $ btrfs subvolume list -A /mnt
 ID 256 gen 6 top level 5 path /mnt/AAA

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc |  2 +
 cmds-subvolume.c   | 73 +-
 2 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index 99fff977..fec4b769 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -148,6 +148,8 @@ list deleted subvolumes that are not yet cleaned.
 Other;;
 -t
 print the result as a table.
+-A
+print path in absolute path.
 
 Sorting;;
 -G [+|-]
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 23596c17..ea341d50 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1156,7 +1156,8 @@ static void get_subvols_info(struct subvol_list **subvols,
 struct btrfs_list_filter_set_v2 *filter_set,
 int fd,
 int tree_id,
-size_t *capacity)
+size_t *capacity,
+const char *prefix)
 {
struct btrfs_util_subvolume_iterator *iter;
enum btrfs_util_error err;
@@ -1206,7 +1207,10 @@ static void get_subvols_info(struct subvol_list 
**subvols,
goto out;
}
 
-   subvol.path = strdup(".");
+   if (prefix)
+   subvol.path = strdup(prefix);
+   else
+   subvol.path = strdup(".");
if (!filters_match(, filter_set)) {
free(subvol.path);
} else {
@@ -1231,6 +1235,29 @@ skip:
goto out;
}
 
+   if (prefix) {
+   char *temp = subvol.path;
+
+   subvol.path = malloc(strlen(prefix) +
+strlen(subvol.path) + 2);
+   if (!subvol.path) {
+   error("out of memory");
+   subvol.path = temp;
+   ret = -1;
+   goto out;
+   }
+
+   strcpy(subvol.path, prefix);
+   if (strlen(prefix) == 1) {
+   strcpy(subvol.path + 1, temp);
+   } else {
+   subvol.path[strlen(prefix)] = '/';
+   strcpy(subvol.path + strlen(prefix) + 1, temp);
+   }
+
+   free(temp);
+   }
+
if (!filters_match(, filter_set)) {
free(subvol.path);
} else {
@@ -1252,6 +1279,7 @@ out:
 
 static struct subvol_list *btrfs_list_subvols(int fd,
  int is_list_all,
+ int absolute_path,
  const char *path,
  struct btrfs_list_filter_set_v2 
*filter_set)
 {
@@ -1265,11 +1293,24 @@ static struct subvol_list *btrfs_list_subvols(int fd,
}
subvols->num = 0;
 
-   if (is_list_all)
+   if (is_list_all) {
get_subvols_info(, filter_set, fd,
-   BTRFS_FS_TREE_OBJECTID, );
-   else
-   get_subvols_info(, filter_set, fd, 0, );
+   BTRFS_FS_TREE_OBJECTID, , NULL);
+   } else {
+   char *fullpath;
+
+   fullpath = realpath(path, NULL);
+   if (!fullpath) {
+   error("cannot find real path for '%s': %m", path);
+   free_subvol_list(subvols);
+   return NULL;
+   }
+
+   get_subvols_info(, filter_set, fd, 0, ,
+   (absolute_path ? fullpath : NULL));
+
+   free(fullpath);
+   }
 
return subvols;
 }
@@ -1279,6 +1320,7 @@ static int btrfs_list_subvols_print_v2(int fd,
struct btrfs_list_comparer_set_v2 *comp_set,
enum btrfs_list_layout layout,
int is_list_all,
+   int absolute_path,
const char *path,
const char *raw_prefix)
 {
@@ -1287,7 +1329,8 @@ static int btrfs_list_subvols_print_v2(int fd,
if (filter

[PATCH v2 00/20] btrfs-progs: Rework of "subvolume list/show" and relax the root privileges of them

2018-06-18 Thread Misono Tomohiro
f this.)

 8th-15th patch update the behavior of "sub list"

 16th-17th patch update the behavior of "sub show"

 18th-20th patch are cli-test for "sub list" of new behavior.


* Future todo:
If this approach is ok, I'd like to update the output of "sub list" more like:
  - Remove obsolete field (i.e. top-level) from output

Any comments are welcome.
Thanks,
Tomohiro Misono

[1] https://www.spinics.net/lists/linux-btrfs/msg74917.html 

Misono Tomohiro (20):
  btrfs-progs: doc: Update man btrfs subvolume
  btrfs-progs: ioctl/libbtrfsutil: Add 3 definitions of new unprivileged
ioctl
  btrfs-progs: libbtrfsutil: Factor out btrfs_util_subvolume_info_fd()
  btrfs-porgs: libbtrfsutil: Relax the privileges of
util_subvolume_info()
  btrfs-progs: libbtrfsuitl: Factor out
btrfs_util_subvolume_iterator_next()
  btrfs-progs: libbtrfsutil: Relax the privileges of subvolume iterator
  btrfs-progs: sub list: Use libbtrfsuitl for subvolume list
  btrfs-progs: sub list: factor out main part of btrfs_list_subvols
  btrfs-progs: sub list: Change the default behavior of "subvolume list"
and allow non-privileged user to call it
  btrfs-progs: sub list: Add -A option to output path in absolute path
  btrfs-progs: sub list: Add -f option to follow mounted subvolumes
below the path
  btrfs-progs: sub list: Add --nosort option to output incrementally
without sort
  btrfs-progs: sub list: Update -a option and remove meaningless filter
  btrfs-progs: sub list: Update help message of -o option
  btrfs-progs: sub list: Update help message of -d option
  btrfs-progs: utils: Fallback to open without O_NOATIME flag in
find_mount_root():
  btrfs-progs: sub show: Allow non-privileged user to call "subvolume
show"
  btrfs-progs: test: Add helper function to check if test user exists
  btrfs-porgs: test: Add cli-test/009 to check subvolume list for both
root and normal user
  btrfs-progs: test: Add cli-test/010 to check "subvolume list -f"
option

 Documentation/btrfs-subvolume.asciidoc|  108 +-
 cmds-subvolume.c  | 1372 -
 ioctl.h   |   99 ++
 libbtrfsutil/btrfs.h  |   97 ++
 libbtrfsutil/btrfsutil.h  |   25 +-
 libbtrfsutil/errors.c |   10 +
 libbtrfsutil/subvolume.c  |  494 +++-
 tests/cli-tests/009-subvolume-list/test.sh|  134 ++
 tests/cli-tests/010-subvolume-list-follow/test.sh |   86 ++
 tests/common  |   10 +
 utils.c   |3 +
 11 files changed, 2315 insertions(+), 123 deletions(-)
 create mode 100755 tests/cli-tests/009-subvolume-list/test.sh
 create mode 100755 tests/cli-tests/010-subvolume-list-follow/test.sh

-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 04/20] btrfs-porgs: libbtrfsutil: Relax the privileges of util_subvolume_info()

2018-06-18 Thread Misono Tomohiro
This commit relaxes the privileges of util_subvolume_info() if kernel
supports new ioctl (BTRFS_IOC_GET_SUBVOL_INFO) and @id is zero
(i.e. when getting the information of the given path/fd).
For older kernel (< 4.18), the behavior is the same.

Signed-off-by: Misono Tomohiro 
---
 libbtrfsutil/btrfsutil.h |  7 +-
 libbtrfsutil/errors.c|  4 
 libbtrfsutil/subvolume.c | 58 
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/libbtrfsutil/btrfsutil.h b/libbtrfsutil/btrfsutil.h
index 6d655f49..3f21d163 100644
--- a/libbtrfsutil/btrfsutil.h
+++ b/libbtrfsutil/btrfsutil.h
@@ -63,6 +63,8 @@ enum btrfs_util_error {
BTRFS_UTIL_ERROR_SYNC_FAILED,
BTRFS_UTIL_ERROR_START_SYNC_FAILED,
BTRFS_UTIL_ERROR_WAIT_SYNC_FAILED,
+   BTRFS_UTIL_ERROR_INVALID_ARGUMENT_FOR_USER,
+   BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED,
 };
 
 /**
@@ -266,7 +268,10 @@ struct btrfs_util_subvolume_info {
  * to check whether the subvolume exists; %BTRFS_UTIL_ERROR_SUBVOLUME_NOT_FOUND
  * will be returned if it does not.
  *
- * This requires appropriate privilege (CAP_SYS_ADMIN).
+ * This requires appropriate privilege (CAP_SYS_ADMIN) for kernel < 4.18.
+ * From kernel >= 4.18 which supports BTRFS_IOC_GET_SUGBVOL_INFO,
+ * non-privileged user with appropriate permission for @path can use this too
+ * (in that case @id must be zero).
  *
  * Return: %BTRFS_UTIL_OK on success, non-zero error code on failure.
  */
diff --git a/libbtrfsutil/errors.c b/libbtrfsutil/errors.c
index 634edc65..f196fa71 100644
--- a/libbtrfsutil/errors.c
+++ b/libbtrfsutil/errors.c
@@ -45,6 +45,10 @@ static const char * const error_messages[] = {
[BTRFS_UTIL_ERROR_SYNC_FAILED] = "Could not sync filesystem",
[BTRFS_UTIL_ERROR_START_SYNC_FAILED] = "Could not start filesystem 
sync",
[BTRFS_UTIL_ERROR_WAIT_SYNC_FAILED] = "Could not wait for filesystem 
sync",
+   [BTRFS_UTIL_ERROR_INVALID_ARGUMENT_FOR_USER] =
+   "Non-root user cannot specify subvolume id",
+   [BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED] =
+   "Could not get subvolume information by BTRFS_IOC_GET_SUBVOL_INFO",
 };
 
 PUBLIC const char *btrfs_util_strerror(enum btrfs_util_error err)
diff --git a/libbtrfsutil/subvolume.c b/libbtrfsutil/subvolume.c
index 0d7ef5bf..e94c7079 100644
--- a/libbtrfsutil/subvolume.c
+++ b/libbtrfsutil/subvolume.c
@@ -31,6 +31,14 @@
 
 #include "btrfsutil_internal.h"
 
+static bool is_root(void)
+{
+   uid_t uid;
+
+   uid = geteuid();
+   return (uid == 0);
+}
+
 /*
  * This intentionally duplicates btrfs_util_is_subvolume_fd() instead of 
opening
  * a file descriptor and calling it, because fstat() and fstatfs() don't accept
@@ -383,11 +391,61 @@ static enum btrfs_util_error get_subvolume_info_root(int 
fd, uint64_t id,
return BTRFS_UTIL_OK;
 }
 
+static enum btrfs_util_error get_subvolume_info_user(int fd,
+struct 
btrfs_util_subvolume_info *subvol)
+{
+   struct btrfs_ioctl_get_subvol_info_args info;
+   int ret;
+
+   ret = ioctl(fd, BTRFS_IOC_GET_SUBVOL_INFO, );
+   if (ret < 0)
+   return BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED;
+
+   subvol->id = info.treeid;
+   subvol->parent_id = info.parent_id;
+   subvol->dir_id = info.dirid;
+   subvol->flags = info.flags;
+   subvol->generation = info.generation;
+
+   memcpy(subvol->uuid, info.uuid, sizeof(subvol->uuid));
+   memcpy(subvol->parent_uuid, info.parent_uuid,
+   sizeof(subvol->parent_uuid));
+   memcpy(subvol->received_uuid, info.received_uuid,
+   sizeof(subvol->received_uuid));
+
+   subvol->ctransid = info.ctransid;
+   subvol->otransid = info.otransid;
+   subvol->stransid = info.stransid;
+   subvol->rtransid = info.rtransid;
+
+   subvol->ctime.tv_sec  = info.ctime.sec;
+   subvol->ctime.tv_nsec = info.ctime.nsec;
+   subvol->otime.tv_sec  = info.otime.sec;
+   subvol->otime.tv_nsec = info.otime.nsec;
+   subvol->stime.tv_sec  = info.stime.sec;
+   subvol->stime.tv_nsec = info.stime.nsec;
+   subvol->rtime.tv_sec  = info.rtime.sec;
+   subvol->rtime.tv_nsec = info.rtime.nsec;
+
+   return BTRFS_UTIL_OK;
+}
+
 PUBLIC enum btrfs_util_error btrfs_util_subvolume_info_fd(int fd, uint64_t id,
  struct 
btrfs_util_subvolume_info *subvol)
 {
enum btrfs_util_error err;
 
+   if (!is_root()) {
+   if (id != 0)
+   return BTRFS_UTIL_ERROR_INVALID_ARGUMENT_FOR_USER;
+
+   err = btrfs_util_is_subvolume_fd(fd);
+   if (err)
+   return err;
+
+   return get_subvolume_info_u

[PATCH v2 12/20] btrfs-progs: sub list: Add --nosort option to output incrementally without sort

2018-06-18 Thread Misono Tomohiro
Currently, "subvolume list" loads all the subvolume information into
memory first in order to sort them. This may cause a performance problem
if there are a lot of subvolumes.

This commit adds --nosort option to output subvolume information
incrementally without sort to avoid consuming memory.

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc |   5 ++
 cmds-subvolume.c   | 140 ++---
 2 files changed, 100 insertions(+), 45 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index b2461398..cd91385a 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -170,6 +170,11 @@ you can add \'\+' or \'-' in front of each items, \'+' 
means ascending,
 +
 for --sort you can combine some items together by \',', just like
 --sort=+ogen,-gen,path,rootid.
++
+--nosort
+Output the results incrementally without sort. This avoids loading all
+subvolume information to memory and can be useful when there is a lot
+of subvolumes.
 
 *set-default* [| ]::
 Set the default subvolume for the (mounted) filesystem.
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 4ebe0377..802f5c5e 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1036,6 +1036,23 @@ static void print_all_subvol_info_tab_head(void)
}
 }
 
+static void print_one_subvol_info(struct listed_subvol *subvol,
+ enum btrfs_list_layout layout,
+ const char *raw_prefix)
+{
+   switch (layout) {
+   case BTRFS_LIST_LAYOUT_DEFAULT:
+   print_one_subvol_info_default(subvol);
+   break;
+   case BTRFS_LIST_LAYOUT_TABLE:
+   print_one_subvol_info_table(subvol);
+   break;
+   case BTRFS_LIST_LAYOUT_RAW:
+   print_one_subvol_info_raw(subvol, raw_prefix);
+   break;
+   }
+}
+
 static void print_all_subvol_info(struct subvol_list *subvols,
  enum btrfs_list_layout layout,
  const char *raw_prefix)
@@ -1048,17 +1065,7 @@ static void print_all_subvol_info(struct subvol_list 
*subvols,
for (i = 0; i < subvols->num; i++) {
struct listed_subvol *subvol = >subvols[i];
 
-   switch (layout) {
-   case BTRFS_LIST_LAYOUT_DEFAULT:
-   print_one_subvol_info_default(subvol);
-   break;
-   case BTRFS_LIST_LAYOUT_TABLE:
-   print_one_subvol_info_table(subvol);
-   break;
-   case BTRFS_LIST_LAYOUT_RAW:
-   print_one_subvol_info_raw(subvol, raw_prefix);
-   break;
-   }
+   print_one_subvol_info(subvol, layout, raw_prefix);
}
 }
 
@@ -1159,7 +1166,9 @@ static void get_subvols_info(struct subvol_list **subvols,
 int tree_id,
 size_t *capacity,
 const char *prefix,
-int show_top)
+int show_top,
+enum btrfs_list_layout layout,
+const char *raw_prefix)
 {
struct btrfs_util_subvolume_iterator *iter;
enum btrfs_util_error err;
@@ -1216,9 +1225,14 @@ static void get_subvols_info(struct subvol_list 
**subvols,
if (!filters_match(, filter_set)) {
free(subvol.path);
} else {
-   ret = add_subvol(subvols, , capacity);
-   if (ret)
-   goto out;
+   if (*subvols == NULL) {
+   print_one_subvol_info(,
+ layout, raw_prefix);
+   } else {
+   ret = add_subvol(subvols, , capacity);
+   if (ret)
+   goto out;
+   }
}
}
 
@@ -1263,9 +1277,14 @@ skip:
if (!filters_match(, filter_set)) {
free(subvol.path);
} else {
-   ret = add_subvol(subvols, , capacity);
-   if (ret)
-   goto out;
+   if (*subvols == NULL) {
+   print_one_subvol_info(,
+ layout, raw_prefix);
+   } else {
+   ret = add_subvol(subvols, , capacity);
+   if (ret)
+   goto out;
+   }
}
}
 
@@ -12

[PATCH v2 05/20] btrfs-progs: libbtrfsuitl: Factor out btrfs_util_subvolume_iterator_next()

2018-06-18 Thread Misono Tomohiro
Factor out the main logic of btrfs_util_subvolume_iterator_next().
This is a preparation work to relax the root privilege of this function.

No functional change happens.

Signed-off-by: Misono Tomohiro 
---
 libbtrfsutil/subvolume.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/libbtrfsutil/subvolume.c b/libbtrfsutil/subvolume.c
index e94c7079..73471d4f 100644
--- a/libbtrfsutil/subvolume.c
+++ b/libbtrfsutil/subvolume.c
@@ -1255,9 +1255,9 @@ static enum btrfs_util_error build_subvol_path(struct 
btrfs_util_subvolume_itera
return BTRFS_UTIL_OK;
 }
 
-PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next(struct 
btrfs_util_subvolume_iterator *iter,
-   char **path_ret,
-   uint64_t 
*id_ret)
+static enum btrfs_util_error subvolume_iterator_next_root(struct 
btrfs_util_subvolume_iterator *iter,
+ char **path_ret,
+ uint64_t *id_ret)
 {
struct search_stack_entry *top;
const struct btrfs_ioctl_search_header *header;
@@ -1331,6 +1331,13 @@ out:
return BTRFS_UTIL_OK;
 }
 
+PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next(struct 
btrfs_util_subvolume_iterator *iter,
+   char **path_ret,
+   uint64_t 
*id_ret)
+{
+   return subvolume_iterator_next_root(iter, path_ret, id_ret);
+}
+
 PUBLIC enum btrfs_util_error btrfs_util_subvolume_iterator_next_info(struct 
btrfs_util_subvolume_iterator *iter,
 char 
**path_ret,
 struct 
btrfs_util_subvolume_info *subvol)
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 13/20] btrfs-progs: sub list: Update -a option and remove meaningless filter

2018-06-18 Thread Misono Tomohiro
Currently, -a option add filter and change subvolume path as follows:
  - If a subvolume is a child of the specified path, nothing changes
  - otherwise, adds  to head

This is rather meaningless, so let's remove this filter.

As a result, the behavior of -a option becomes the same as
default behavior of sub list in progs <= 4.17

[Example]
 $ mkfs.btrfs -f $DEV
 $ mount $DEV /mnt

 $ btrfs subvolume create /mnt/AAA
 $ btrfs subvolume create /mnt/AAA/BBB
 $ btrfs subvolume create /mnt/ZZZ

 $ btrfs subvolume list -a /mnt
 ID 256 gen 9 top level 5 path AAA
 ID 257 gen 9 top level 256 path AAA/BBB
 ID 258 gen 10 top level 5 path ZZZ

 ** output of progs <= 4.17
 $ btrfs subvolume list -a /mnt
 ID 256 gen 9 top level 5 path AAA
 ID 257 gen 9 top level 256 path /AAA/BBB
 ID 258 gen 10 top level 5 path ZZZ

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc |  6 --
 cmds-subvolume.c   | 35 --
 2 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index cd91385a..20fae1e1 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -118,8 +118,10 @@ Path filtering;;
 -o
 print only subvolumes below specified .
 -a
-print all the subvolumes in the filesystem and distinguish between
-absolute and relative path with respect to the given .
+print all the subvolumes in the filesystem, including subvolumes
+which cannot be accessed from current mount point.
+path to be shown is relative to the top-level subvolume
+(require root privileges).
 -f
 follow mounted subvolumes below  recursively and list them too
 (only if it is the same filesystem). If top-level subvolume is mounted
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 802f5c5e..dab266aa 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -759,28 +759,6 @@ static int filter_topid_equal(struct listed_subvol 
*subvol, uint64_t data)
return subvol->info.parent_id == data;
 }
 
-static int filter_full_path(struct listed_subvol *subvol, uint64_t data)
-{
-   /*
-* This implements the same behavior as before the conversion to
-* libbtrfsutil, which is mostly nonsensical.
-*/
-   if (subvol->info.parent_id != data) {
-   char *tmp;
-   int ret;
-
-   ret = asprintf(, "/%s", subvol->path);
-   if (ret == -1) {
-   error("out of memory");
-   exit(1);
-   }
-
-   free(subvol->path);
-   subvol->path = tmp;
-   }
-   return 1;
-}
-
 static int filter_by_parent(struct listed_subvol *subvol, uint64_t data)
 {
return !uuid_compare(subvol->info.parent_uuid,
@@ -798,7 +776,6 @@ static btrfs_list_filter_func_v2 all_filter_funcs[] = {
[BTRFS_LIST_FILTER_CGEN_LESS]   = filter_cgen_less,
[BTRFS_LIST_FILTER_CGEN_EQUAL]  = filter_cgen_equal,
[BTRFS_LIST_FILTER_TOPID_EQUAL] = filter_topid_equal,
-   [BTRFS_LIST_FILTER_FULL_PATH]   = filter_full_path,
[BTRFS_LIST_FILTER_BY_PARENT]   = filter_by_parent,
 };
 
@@ -1574,9 +1551,9 @@ static const char * const cmd_subvol_list_usage[] = {
"",
"Path filtering:",
"-o   print only subvolumes below specified path",
-   "-a   print all the subvolumes in the filesystem and",
-   " distinguish absolute and relative path with respect",
-   " to the given  (require root privileges)",
+   "-a   print all the subvolumes in the filesystem.",
+   " path to be shown is relative to the top-level",
+   " subvolume (require root privileges)",
"-f   follow mounted subvolumes below the specified path",
" and list them too (only if it is the same filesystem)",
"",
@@ -1783,11 +1760,7 @@ static int cmd_subvol_list(int argc, char **argv)
if (ret)
goto out;
 
-   if (is_list_all)
-   btrfs_list_setup_filter_v2(_set,
-   BTRFS_LIST_FILTER_FULL_PATH,
-   top_id);
-   else if (is_only_in_path)
+   if (is_only_in_path)
btrfs_list_setup_filter_v2(_set,
BTRFS_LIST_FILTER_TOPID_EQUAL,
top_id);
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 01/20] btrfs-progs: doc: Update man btrfs subvolume

2018-06-18 Thread Misono Tomohiro
Some information is obsolete and updated as follows:
 - Add missing explanations of some options
 - Remove outdated explanation of "subvolrootid" mount option
 - Reorder/group options of "sub list" to corresponds help message
 - Add explanation about different meaning of parent in "parent ID/UUID"
 - Fix indent/misspelling
 - Add missing comma

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc | 77 --
 1 file changed, 55 insertions(+), 22 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index a8c4af4b..f3eb4e26 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -77,13 +77,16 @@ safely stored on the device.
 `Options`
 +
 -c|--commit-after
-wait for transaction commit at the end of the operation
+wait for transaction commit at the end of the operation.
 +
 -C|--commit-each
-wait for transaction commit after deleting each subvolume
+wait for transaction commit after deleting each subvolume.
++
+-v|--verbose
+verbose output of operations.
 
 *find-new*  ::
-List the recently modified files in a subvolume, after  ID.
+List the recently modified files in a subvolume, after  generation.
 
 *get-default* ::
 Get the default subvolume of the filesystem .
@@ -93,40 +96,54 @@ The output format is similar to *subvolume list* command.
 *list* [options] [-G [\+|-]] [-C [+|-]] 
[--sort=rootid,gen,ogen,path] ::
 List the subvolumes present in the filesystem .
 +
-For every subvolume the following information is shown by default. +
-ID  top level  path  +
-where path is the relative path of the subvolume to the top level subvolume.
+For every subvolume the following information is shown by default:
++
+ID  gen  top level  path 
++
+where ID is subvolume's id, gen is an internal counter which is updated
+every transaction, top level is the same as parent subvolume's id, and
+path is the relative path of the subvolume to the top level subvolume.
 The subvolume's ID may be used by the subvolume set-default command,
 or at mount time via the subvolid= option.
-If `-p` is given, then parent  is added to the output between ID
-and top level. The parent's ID may be used at mount time via the
-`subvolrootid=` option.
 +
 `Options`
 +
--p
-print parent ID.
+Path filtering;;
+-o
+print only subvolumes below specified .
 -a
 print all the subvolumes in the filesystem and distinguish between
 absolute and relative path with respect to the given .
+
+Field selection;;
+-p
+print the parent ID
+('parent' here means the subvolume which contains this subvolume).
 -c
 print the ogeneration of the subvolume, aliases: ogen or origin generation.
 -g
-print the generation of the subvolume.
--o
-print only subvolumes below specified .
+print the generation of the subvolume (default).
 -u
 print the UUID of the subvolume.
 -q
-print the parent uuid of subvolumes (and snapshots).
+print the parent UUID of the subvolume
+('parent' here means subvolume of which this subvolume is a snapshot).
 -R
-print the UUID of the sent subvolume, where the subvolume is the result of a 
receive operation
--t
-print the result as a table.
+print the UUID of the sent subvolume, where the subvolume is the result of a 
receive operation.
+
+Type filtering;;
 -s
 only snapshot subvolumes in the filesystem will be listed.
 -r
 only readonly subvolumes in the filesystem will be listed.
+-d
+list deleted subvolumes that are not yet cleaned.
+
+Other;;
+-t
+print the result as a table.
+
+Sorting;;
 -G [+|-]
 list subvolumes in the filesystem that its generation is
 >=, \<= or = value. \'\+' means >= value, \'-' means \<= value, If there is
@@ -144,9 +161,9 @@ for --sort you can combine some items together by \',', 
just like
 
 *set-default* [| ]::
 Set the default subvolume for the (mounted) filesystem.
-
++
 Set the default subvolume for the (mounted) filesystem at . This will 
hide
-the top-level subvolume (ie. the one mounted with 'subvol=/' or 'subvolid=5').
+the top-level subvolume (i.e. the one mounted with 'subvol=/' or 'subvolid=5').
 Takes action on next mount.
 +
 There are two ways how to specify the subvolume, by  or by the 
@@ -154,10 +171,22 @@ path.
 The id can be obtained from *btrfs subvolume list*, *btrfs subvolume show* or
 *btrfs inspect-internal rootid*.
 
-*show* ::
+*show* [options] |::
 Show information of a given subvolume in the .
++
+`Options`
++
+-r|--rootid
+rootid of the subvolume.
+-u|--uuid:::
+UUID of the subvolume.
+
++
+If no option is specified, subvolume information of  is shown,
+otherwise the subvolume information of rootid or UUID in the filesystem
+is shown.
 
-*snapshot* [-r]  |[/]::
+*snapshot* [-r|-i ]  |[/]::
 Create a snapshot of the subvolume  with the
 name  in the  directory.
 +
@@ -168,6 +197,10 @@ If  is not a subvolume, btrfs 

[PATCH v2 02/20] btrfs-progs: ioctl/libbtrfsutil: Add 3 definitions of new unprivileged ioctl

2018-06-18 Thread Misono Tomohiro
Copy and add 3 definitions of new unprivileged ioctl
(BTRFS_IOC_GET_SUBVOL_INFO, BTRFS_IOC_GET_SUBVOL_ROOTREF and
BTRFS_IOC_INO_LOOKUP_USER) from kernel. They will be used to implement
the user version of "btrfs subvolume list/show" etc.

Signed-off-by: Misono Tomohiro 
---
 ioctl.h  | 99 
 libbtrfsutil/btrfs.h | 97 ++
 2 files changed, 196 insertions(+)

diff --git a/ioctl.h b/ioctl.h
index 709e996f..cce55dbd 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -320,6 +320,22 @@ struct btrfs_ioctl_ino_lookup_args {
 };
 BUILD_ASSERT(sizeof(struct btrfs_ioctl_ino_lookup_args) == 4096);
 
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
+struct btrfs_ioctl_ino_lookup_user_args {
+   /* in, inode number containing the subvolume of 'subvolid' */
+   __u64 dirid;
+   /* in */
+   __u64 treeid;
+   /* out, name of the subvolume of 'treeid' */
+   char name[BTRFS_VOL_NAME_MAX + 1];
+   /*
+* out, constructed path from the directory with which the ioctl is
+* called to dirid
+*/
+   char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+BUILD_ASSERT(sizeof(struct btrfs_ioctl_ino_lookup_user_args) == 4096);
+
 struct btrfs_ioctl_search_key {
/* which root are we searching.  0 is the tree of tree roots */
__u64 tree_id;
@@ -672,6 +688,83 @@ BUILD_ASSERT(sizeof(struct btrfs_ioctl_send_args_64) == 
72);
 
 #define BTRFS_IOC_SEND_64_COMPAT_DEFINED 1
 
+/*
+ * Information about a fs tree root.
+ *
+ * All items are filled by the ioctl
+ */
+struct btrfs_ioctl_get_subvol_info_args {
+   /* Id of this subvolume */
+   __u64 treeid;
+
+   /* Name of this subvolume, used to get the real name at mount point */
+   char name[BTRFS_VOL_NAME_MAX + 1];
+
+   /*
+* Id of the subvolume which contains this subvolume.
+* Zero for top-level subvolume or a deleted subvolume.
+*/
+   __u64 parent_id;
+
+   /*
+* Inode number of the directory which contains this subvolume.
+* Zero for top-level subvolume or a deleted subvolume
+*/
+   __u64 dirid;
+
+   /* Latest transaction id of this subvolume */
+   __u64 generation;
+
+   /* Flags of this subvolume */
+   __u64 flags;
+
+   /* UUID of this subvolume */
+   __u8 uuid[BTRFS_UUID_SIZE];
+
+   /*
+* UUID of the subvolume of which this subvolume is a snapshot.
+* All zero for a non-snapshot subvolume.
+*/
+   __u8 parent_uuid[BTRFS_UUID_SIZE];
+
+   /*
+* UUID of the subvolume from which this subvolume was received.
+* All zero for non-received subvolume.
+*/
+   __u8 received_uuid[BTRFS_UUID_SIZE];
+
+   /* Transaction id indicating when change/create/send/receive happened */
+   __u64 ctransid;
+   __u64 otransid;
+   __u64 stransid;
+   __u64 rtransid;
+   /* Time corresponding to c/o/s/rtransid */
+   struct btrfs_ioctl_timespec ctime;
+   struct btrfs_ioctl_timespec otime;
+   struct btrfs_ioctl_timespec stime;
+   struct btrfs_ioctl_timespec rtime;
+
+   /* Must be zero */
+   __u64 reserved[8];
+};
+
+#define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
+struct btrfs_ioctl_get_subvol_rootref_args {
+   /* in/out, minimum id of rootref's treeid to be searched */
+   __u64 min_treeid;
+
+   /* out */
+   struct {
+   __u64 treeid;
+   __u64 dirid;
+   } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
+
+   /* out, number of found items */
+   __u8 num_items;
+   __u8 align[7];
+};
+BUILD_ASSERT(sizeof(struct btrfs_ioctl_get_subvol_rootref_args) == 4096);
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
notused,
@@ -828,6 +921,12 @@ static inline char *btrfs_err_str(enum btrfs_err_code 
err_code)
   struct btrfs_ioctl_feature_flags[3])
 #define BTRFS_IOC_RM_DEV_V2_IOW(BTRFS_IOCTL_MAGIC, 58, \
   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
+   struct btrfs_ioctl_get_subvol_info_args)
+#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \
+   struct btrfs_ioctl_get_subvol_rootref_args)
+#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
+   struct btrfs_ioctl_ino_lookup_user_args)
 #ifdef __cplusplus
 }
 #endif
diff --git a/libbtrfsutil/btrfs.h b/libbtrfsutil/btrfs.h
index c293f6bf..95740de2 100644
--- a/libbtrfsutil/btrfs.h
+++ b/libbtrfsutil/btrfs.h
@@ -421,6 +421,21 @@ struct btrfs_ioctl_ino_lookup_args {
char name[BTRFS_INO_LOOKUP_PATH_MAX];
 };
 
+#define BTRFS_INO_LOOKUP_USER_PATH

[PATCH v2 11/20] btrfs-progs: sub list: Add -f option to follow mounted subvolumes below the path

2018-06-18 Thread Misono Tomohiro
Add -f option to follow mounted subvolumes below the specified path, only
if it is the same filesystem.

[Example]
 $ mkfs.btrfs -f $DEV
 $ mkfs.btrfs -f $DEV2
 $ mount $DEV /mnt

 $ btrfs subvolume create /mnt/AAA
 $ btrfs subvolume create /mnt/BBB
 $ btrfs subvolume create /mnt/CCC
 $ mkdir /mnt/AAA/bbb
 $ mkdir /mnt/AAA/ccc
 $ mkdir /mnt/AAA/other

 $ umount /mnt
 $ mount -o subvol=AAA $DEV /mnt
 $ mount -o subvol=BBB $DEV /mnt/bbb
 $ mount -o subvol=CCC $DEV /mnt/ccc
 $ mount -o $DEV2 /mnt/other

 $ btrfs subvolume list /mnt
 ID 256 gen 9 top level 5 path .

 $ btrfs subvolume list -f /mnt
 ID 256 gen 9 top level 5 path .
 ID 258 gen 7 top level 5 path bbb
 ID 259 gen 8 top level 5 path ccc

Note that this option lists top-level subvolume if it is mounted in the
way.

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc |   4 ++
 cmds-subvolume.c   | 116 +++--
 2 files changed, 113 insertions(+), 7 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index fec4b769..b2461398 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -120,6 +120,10 @@ print only subvolumes below specified .
 -a
 print all the subvolumes in the filesystem and distinguish between
 absolute and relative path with respect to the given .
+-f
+follow mounted subvolumes below  recursively and list them too
+(only if it is the same filesystem). If top-level subvolume is mounted
+in the way, it is also listed.
 
 Field selection;;
 -p
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index ea341d50..4ebe0377 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -1157,7 +1158,8 @@ static void get_subvols_info(struct subvol_list **subvols,
 int fd,
 int tree_id,
 size_t *capacity,
-const char *prefix)
+const char *prefix,
+int show_top)
 {
struct btrfs_util_subvolume_iterator *iter;
enum btrfs_util_error err;
@@ -1195,7 +1197,7 @@ static void get_subvols_info(struct subvol_list **subvols,
ret = -1;
goto out;
}
-   if (id == BTRFS_FS_TREE_OBJECTID) {
+   if (!show_top && id == BTRFS_FS_TREE_OBJECTID) {
/* Skip top level subvolume */
ret = 0;
goto skip;
@@ -1280,6 +1282,7 @@ out:
 static struct subvol_list *btrfs_list_subvols(int fd,
  int is_list_all,
  int absolute_path,
+ int follow_mount,
  const char *path,
  struct btrfs_list_filter_set_v2 
*filter_set)
 {
@@ -1295,7 +1298,8 @@ static struct subvol_list *btrfs_list_subvols(int fd,
 
if (is_list_all) {
get_subvols_info(, filter_set, fd,
-   BTRFS_FS_TREE_OBJECTID, , NULL);
+   BTRFS_FS_TREE_OBJECTID, , NULL,
+   false);
} else {
char *fullpath;
 
@@ -1307,8 +1311,92 @@ static struct subvol_list *btrfs_list_subvols(int fd,
}
 
get_subvols_info(, filter_set, fd, 0, ,
-   (absolute_path ? fullpath : NULL));
+   (absolute_path ? fullpath : NULL), false);
 
+   if (subvols == NULL) {
+   free(fullpath);
+   return NULL;
+   }
+
+   /* Follow mounted subvolumes below @path */
+   if (follow_mount) {
+   struct mntent *mnt;
+   FILE *f;
+   DIR *dirstream;
+   u8 fsid[BTRFS_FSID_SIZE];
+   u8 fsid2[BTRFS_FSID_SIZE];
+   char *c;
+   int fd2;
+   int ret;
+
+   ret = get_fsid(path, fsid, 0);
+   if (ret < 0) {
+   error("failed to get fsid: %m");
+   free(fullpath);
+   free_subvol_list(subvols);
+   return NULL;
+   }
+
+   f = setmntent("/proc/self/mounts", "r");
+   if (f == NULL) {
+   error("failed to read mount entry: %m");
+   free(fullpath);
+   free_subvol_list(su

[PATCH v2 15/20] btrfs-progs: sub list: Update help message of -d option

2018-06-18 Thread Misono Tomohiro
Explicitly states that -d requires root privileges.
Also, update some option handling with regard to -d option.

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc | 3 ++-
 cmds-subvolume.c   | 8 
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index 0381c92c..2db1d479 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -149,7 +149,8 @@ only snapshot subvolumes in the filesystem will be listed.
 -r
 only readonly subvolumes in the filesystem will be listed.
 -d
-list deleted subvolumes that are not yet cleaned.
+list deleted subvolumes that are not yet cleaned
+(require root privileges).
 
 Other;;
 -t
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 552c6dea..ef39789a 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1569,6 +1569,7 @@ static const char * const cmd_subvol_list_usage[] = {
"-s   list only snapshots",
"-r   list readonly subvolumes (including snapshots)",
"-d   list deleted subvolumes that are not yet cleaned",
+   " (require root privileges)",
"",
"Other:",
"-t   print the result as a table",
@@ -1744,6 +1745,13 @@ static int cmd_subvol_list(int argc, char **argv)
goto out;
}
 
+   if (filter_set->only_deleted &&
+   (is_list_all || absolute_path || follow_mount)) {
+   ret = -1;
+   error("cannot use -d with -a/f/A option");
+   goto out;
+   }
+
subvol = argv[optind];
fd = btrfs_open_dir(subvol, , 1);
if (fd < 0) {
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 06/20] btrfs-progs: libbtrfsutil: Relax the privileges of subvolume iterator

2018-06-18 Thread Misono Tomohiro
This commit relax the privileges of subvolume iterator when the kernel
supports new ioctls (BTRFS_IOC_GET_SUBVOL_ROOTREF and
BTRFS_IOC_INO_LOOKUP_USER).

For older kernel (< 4.18), the behavior is the same.

Non-privileged user can use subvolume iterator only if new ioctls are
supported and btrfs_util_create_subvolume_iterator() is called with @top
zero (i.e. if the iterator is created from given path/fd). In this case:
 - an iterator can be created from non-subvolume directory
 - subvolume will be skipped if
   - it does not exist nor has different id from the found subvolume
 by INO_LOOKUP_USER (may happen if a dir in the path is being mounted)
   - it cannot be opened due to permission error

Note that this commit also allows root user to specify non-subvolume
direcotry when @top is zero. Only in that case, root's code path is
the same as user.

Signed-off-by: Misono Tomohiro 
---
 libbtrfsutil/btrfsutil.h |  18 ++-
 libbtrfsutil/errors.c|   6 +
 libbtrfsutil/subvolume.c | 380 +++
 3 files changed, 377 insertions(+), 27 deletions(-)

diff --git a/libbtrfsutil/btrfsutil.h b/libbtrfsutil/btrfsutil.h
index 3f21d163..49454a63 100644
--- a/libbtrfsutil/btrfsutil.h
+++ b/libbtrfsutil/btrfsutil.h
@@ -65,6 +65,10 @@ enum btrfs_util_error {
BTRFS_UTIL_ERROR_WAIT_SYNC_FAILED,
BTRFS_UTIL_ERROR_INVALID_ARGUMENT_FOR_USER,
BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED,
+   BTRFS_UTIL_ERROR_GET_SUBVOL_ROOTREF_FAILED,
+   BTRFS_UTIL_ERROR_INO_LOOKUP_USER_FAILED,
+   BTRFS_UTIL_ERROR_DUP_FAILED,
+   BTRFS_UTIL_ERROR_CHDIR_FAILED,
 };
 
 /**
@@ -510,6 +514,14 @@ struct btrfs_util_subvolume_iterator;
  * @flags: Bitmask of BTRFS_UTIL_SUBVOLUME_ITERATOR_* flags.
  * @ret: Returned iterator.
  *
+ * Using subvolume iterator requires appropriate privilege (CAP_SYS_ADMIN) for
+ * kernel < 4.18. From kenrel >= 4.18 which supports
+ * BTRFS_IOC_GET_SUBVOL_ROOTREF and BTRFS_IOC_INO_LOOKUP_USER, non-previleged
+ * user can use it too (in that case @top must be zero). Also from kernel
+ * >=4.18, if @top is zero, the specified path can be non-subvolume directory
+ * and subvolumes which cannot be accessed will be skipped (either due to
+ * permission error or path is hidden by other mount).
+ *
  * The returned iterator must be freed with
  * btrfs_util_destroy_subvolume_iterator().
  *
@@ -558,7 +570,8 @@ int btrfs_util_subvolume_iterator_fd(const struct 
btrfs_util_subvolume_iterator
  * Must be freed with free().
  * @id_ret: Returned subvolume ID. May be %NULL.
  *
- * This requires appropriate privilege (CAP_SYS_ADMIN).
+ * This requires appropriate privilege (CAP_SYS_ADMIN) for kernel < 4.18.
+ * See the comment of btrfs_util_create_subvolume_iterator()
  *
  * Return: %BTRFS_UTIL_OK on success, %BTRFS_UTIL_ERROR_STOP_ITERATION if there
  * are no more subvolumes, non-zero error code on failure.
@@ -577,7 +590,8 @@ enum btrfs_util_error 
btrfs_util_subvolume_iterator_next(struct btrfs_util_subvo
  * This convenience function basically combines
  * btrfs_util_subvolume_iterator_next() and btrfs_util_subvolume_info().
  *
- * This requires appropriate privilege (CAP_SYS_ADMIN).
+ * This requires appropriate privilege (CAP_SYS_ADMIN) for kernel < 4.18.
+ * See the comment of btrfs_util_create_subvolume_iterator()
  *
  * Return: See btrfs_util_subvolume_iterator_next().
  */
diff --git a/libbtrfsutil/errors.c b/libbtrfsutil/errors.c
index f196fa71..c77407bf 100644
--- a/libbtrfsutil/errors.c
+++ b/libbtrfsutil/errors.c
@@ -49,6 +49,12 @@ static const char * const error_messages[] = {
"Non-root user cannot specify subvolume id",
[BTRFS_UTIL_ERROR_GET_SUBVOL_INFO_FAILED] =
"Could not get subvolume information by BTRFS_IOC_GET_SUBVOL_INFO",
+   [BTRFS_UTIL_ERROR_GET_SUBVOL_ROOTREF_FAILED] =
+   "Could not get rootref information by BTRRFS_IOC_GET_SUBVOL_ROOTREF",
+   [BTRFS_UTIL_ERROR_INO_LOOKUP_USER_FAILED] =
+   "Could not resolve subvolume path by BTRFS_IOC_INO_LOOKUP_USER",
+   [BTRFS_UTIL_ERROR_DUP_FAILED] = "Could not dup",
+   [BTRFS_UTIL_ERROR_CHDIR_FAILED] = "Could not chdir",
 };
 
 PUBLIC const char *btrfs_util_strerror(enum btrfs_util_error err)
diff --git a/libbtrfsutil/subvolume.c b/libbtrfsutil/subvolume.c
index 73471d4f..ae39d5c8 100644
--- a/libbtrfsutil/subvolume.c
+++ b/libbtrfsutil/subvolume.c
@@ -760,13 +760,21 @@ PUBLIC enum btrfs_util_error 
btrfs_util_create_subvolume_fd(int parent_fd,
 #define BTRFS_UTIL_SUBVOLUME_ITERATOR_CLOSE_FD (1 << 30)
 
 struct search_stack_entry {
+   /* below are used for subvolume_iterator_next_user */
+   uint64_t id;
+   struct btrfs_ioctl_get_subvol_rootref_args rootref_args;
+   /* below is used for subvolume_iterator_next_root */
struct btrfs_ioctl_search_args search;
+   /* below are used for both */
size_t item

[PATCH v2 07/20] btrfs-progs: sub list: Use libbtrfsuitl for subvolume list

2018-06-18 Thread Misono Tomohiro
This is a copy of non-merged following patch originally written
by Omar Sandoval:
  btrfs-progs: use libbtrfsutil for subvolume list
expect this commit keeps libbtrfs implementation which above commit
tries to remove (therefore this adds suffix _v2 for struct/function).

Original Author: Omar Sandoval 
Signed-off-by: Misono Tomohiro 
---
 cmds-subvolume.c | 963 +--
 1 file changed, 936 insertions(+), 27 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 45363a5a..c54a8003 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -404,6 +404,915 @@ keep_fd:
return ret;
 }
 
+#define BTRFS_LIST_NFILTERS_INCREASE   (2 * BTRFS_LIST_FILTER_MAX)
+#define BTRFS_LIST_NCOMPS_INCREASE (2 * BTRFS_LIST_COMP_MAX)
+
+struct listed_subvol {
+   struct btrfs_util_subvolume_info info;
+   char *path;
+};
+
+struct subvol_list {
+   size_t num;
+   struct listed_subvol subvols[];
+};
+
+typedef int (*btrfs_list_filter_func_v2)(struct listed_subvol *, uint64_t);
+typedef int (*btrfs_list_comp_func_v2)(const struct listed_subvol *,
+   const struct listed_subvol *,
+   int);
+
+struct btrfs_list_filter_v2 {
+   btrfs_list_filter_func_v2 filter_func;
+   u64 data;
+};
+
+struct btrfs_list_comparer_v2 {
+   btrfs_list_comp_func_v2 comp_func;
+   int is_descending;
+};
+
+struct btrfs_list_filter_set_v2 {
+   int total;
+   int nfilters;
+   int only_deleted;
+   struct btrfs_list_filter_v2 filters[0];
+};
+
+struct btrfs_list_comparer_set_v2 {
+   int total;
+   int ncomps;
+   struct btrfs_list_comparer_v2 comps[0];
+};
+
+static struct {
+   char*name;
+   char*column_name;
+   int need_print;
+} btrfs_list_columns[] = {
+   {
+   .name   = "ID",
+   .column_name= "ID",
+   .need_print = 0,
+   },
+   {
+   .name   = "gen",
+   .column_name= "Gen",
+   .need_print = 0,
+   },
+   {
+   .name   = "cgen",
+   .column_name= "CGen",
+   .need_print = 0,
+   },
+   {
+   .name   = "parent",
+   .column_name= "Parent",
+   .need_print = 0,
+   },
+   {
+   .name   = "top level",
+   .column_name= "Top Level",
+   .need_print = 0,
+   },
+   {
+   .name   = "otime",
+   .column_name= "OTime",
+   .need_print = 0,
+   },
+   {
+   .name   = "parent_uuid",
+   .column_name= "Parent UUID",
+   .need_print = 0,
+   },
+   {
+   .name   = "received_uuid",
+   .column_name= "Received UUID",
+   .need_print = 0,
+   },
+   {
+   .name   = "uuid",
+   .column_name= "UUID",
+   .need_print = 0,
+   },
+   {
+   .name   = "path",
+   .column_name= "Path",
+   .need_print = 0,
+   },
+   {
+   .name   = NULL,
+   .column_name= NULL,
+   .need_print = 0,
+   },
+};
+
+static btrfs_list_filter_func_v2 all_filter_funcs[];
+static btrfs_list_comp_func_v2 all_comp_funcs[];
+
+static void btrfs_list_setup_print_column_v2(enum btrfs_list_column_enum 
column)
+{
+   int i;
+
+   ASSERT(0 <= column && column <= BTRFS_LIST_ALL);
+
+   if (column < BTRFS_LIST_ALL) {
+   btrfs_list_columns[column].need_print = 1;
+   return;
+   }
+
+   for (i = 0; i < BTRFS_LIST_ALL; i++)
+   btrfs_list_columns[i].need_print = 1;
+}
+
+static int comp_entry_with_rootid_v2(const struct listed_subvol *entry1,
+ const struct listed_subvol *entry2,
+ int is_descending)
+{
+   int ret;
+
+   if (entry1->info.id > entry2->info.id)
+   ret = 1;
+   else if (entry1->info.id < entry2->info.id)
+   ret = -1;
+   else
+   ret = 0;
+
+   return is_descending ? -ret : ret;
+}
+
+static int comp_entry_with_gen_v2(const struct listed_subvol *entry1,
+  const struct listed_subvol *entry2,
+  int is_descending)
+{
+   int ret;
+
+   if (entry1->info.generation > entry2->info.generation)
+   ret = 1;
+

[PATCH v2 17/20] btrfs-progs: sub show: Allow non-privileged user to call "subvolume show"

2018-06-18 Thread Misono Tomohiro
Allow non-privileged user to call subvolume show if new ioctls
(BTRFS_IOC_GET_SUBVOL_INFO/BTRFS_IOC_GET_SUBVOL_ROOTREF,
BTRFS_IOC_INO_LOOKUP_USER, from kernel 4.18) are available.
Non-privileged user still cannot use -r or -u option.

The behavior for root user is the same as before.

There are some output differences between root and user:
  root ... subvolume path is from top-level subvolume
   list all snapshots in the fs (inc. non-accessible ones)
  user ... subvolume path is absolute path
   list snapshots under the mountpoint
   (only to which the user has appropriate access right)

[Example]
 $ sudo mkfs.btrfs -f $DEV
 $ sudo mount $DEV /mnt

 $ sudo btrfs subvolume create /mnt/AAA
 $ sudo btrfs subvolume snapshot /mnt/AAA /mnt/snap1
 $ sudo btrfs subvolume snapshot /mnt/AAA /mnt/AAA/snap2

 $ sudo umount /mnt
 $ sudo mount -o subvol=AAA $DEV /mnt

 # root
 $ sudo btrfs subvolume show /mnt
 AAA
  Name:AAA
  UUID:15e80697-2ffb-0b4b-8e1e-e0873a7cf944
  ...
  Snapshot(s):
   AAA/snap2
   snap1

 # non-privileged user
 $ btrfs subvolume show /mnt
 /mnt
  Name:AAA
  UUID:15e80697-2ffb-0b4b-8e1e-e0873a7cf944
  ...
  Snapshot(s):
   /mnt/snap2

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc |  11 +++-
 cmds-subvolume.c   | 107 +
 2 files changed, 105 insertions(+), 13 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index 2db1d479..fc57e6bc 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -194,12 +194,19 @@ The id can be obtained from *btrfs subvolume list*, 
*btrfs subvolume show* or
 *show* [options] |::
 Show information of a given subvolume in the .
 +
+This command had required root privileges. From kernel 4.18,
+non-privileged user can call this unless -r/-u option is not used.
+Note that for root, output path is relative to the top-level subvolume
+while absolute path is shown for non-privileged user.
+Also for root, snapshots filed lists all the snapshots in the fs while
+only snapshots under mount point are shown for non-privileged user.
++
 `Options`
 +
 -r|--rootid
-rootid of the subvolume.
+rootid of the subvolume (require root privileges).
 -u|--uuid:::
-UUID of the subvolume.
+UUID of the subvolume (require root privileges).
 
 +
 If no option is specified, subvolume information of  is shown,
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index ef39789a..4df418b0 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -2095,8 +2095,12 @@ static int cmd_subvol_find_new(int argc, char **argv)
 static const char * const cmd_subvol_show_usage[] = {
"btrfs subvolume show [options] |",
"Show more information about the subvolume",
-   "-r|--rootid   rootid of the subvolume",
-   "-u|--uuid uuid of the subvolume",
+   "",
+   "This command had required root privileges. From kernel 4.18,",
+   "non-privileged user can call this unless -r/-u option is not used.",
+   "",
+   "-r|--rootid   rootid of the subvolume (require root privileges)",
+   "-u|--uuid uuid of the subvolume   (require root privileges)",
"",
"If no option is specified,  will be shown, otherwise",
"the rootid or uuid are resolved relative to the  path.",
@@ -2109,8 +2113,10 @@ static int cmd_subvol_show(int argc, char **argv)
char uuidparse[BTRFS_UUID_UNPARSED_SIZE];
char *fullpath = NULL;
int fd = -1;
+   int fd_mnt = -1;
int ret = 1;
DIR *dirstream1 = NULL;
+   DIR *dirstream_mnt = NULL;
int by_rootid = 0;
int by_uuid = 0;
u64 rootid_arg = 0;
@@ -2118,7 +2124,10 @@ static int cmd_subvol_show(int argc, char **argv)
struct btrfs_util_subvolume_iterator *iter;
struct btrfs_util_subvolume_info subvol;
char *subvol_path = NULL;
+   char *subvol_name = NULL;
+   char *mount_point = NULL;
enum btrfs_util_error err;
+   bool root;
 
while (1) {
int c;
@@ -2155,6 +2164,12 @@ static int cmd_subvol_show(int argc, char **argv)
usage(cmd_subvol_show_usage);
}
 
+   root = is_root();
+   if (!root && (by_rootid || by_uuid)) {
+   error("only root can use -r or -u options");
+   return -1;
+   }
+
fullpath = realpath(argv[optind], NULL);
if (!fullpath) {
error("cannot find real path for '%s': %m", argv[optind]);
@@ -2209,19 +2224,53 @@ static int cmd_subvol_show(int argc, char **argv)
goto out;
}
 
-   er

[PATCH v2 09/20] btrfs-progs: sub list: Change the default behavior of "subvolume list" and allow non-privileged user to call it

2018-06-18 Thread Misono Tomohiro
Change the default behavior of "subvolume list" and allow non-privileged
user to call it as well.

>From this commit, by default it only lists subvolumes under the specified
path (incl. the path itself except top-level subvolume). Also, if kernel
supports new ioctls (BTRFS_IOC_GET_SUBVOL_INFO/BTRFS_IOC_GET_SUBVOL_ROOTREF
and BTRFS_IOC_INO_LOOKUP_USER, which are avilable from 4.18),
  - the specified path can be non-subvolume directory.
  - non-privileged user can also call it (subvolumes which the user
does not have access right will be skiped).

Note that root user can list all the subvolume in the fs with -a option.

[Example]
 $ mkfs.btrfs -f $DEV
 $ mount $DEV /mnt

 $ btrfs subvolume create /mnt/AAA
 $ btrfs subvolume create /mnt/AAA/BBB
 $ mkdir /mnt/AAA/BBB/dir
 $ btrfs subvolume create /mnt/AAA/BBB/dir/CCC
 $ btrfs subvolume create /mnt/ZZZ

 $ umount /mnt
 $ mount -o subvol=AAA $DEV /mnt

 $ btrfs subvolume list /mnt
 ID 256 gen 11 top level 5 path .
 ID 257 gen 8 top level 256 path BBB
 ID 258 gen 8 top level 257 path BBB/dir/CCC

 $ btrfs subvolume list /mnt/BBB
 ID 257 gen 8 top level 256 path .
 ID 258 gen 8 top level 257 path dir/CCC

 $ btrfs subvolume list /mnt/BBB/dir
 ID 258 gen 8 top level 257 path CCC

 ** output of progs <= 4.17
 $ mount -o subvol=AAA $DEV /mnt
 $ btrfs subvolume list /mnt
 ID 256 gen 11 top level 5 path AAA
 ID 257 gen 8 top level 256 path BBB
 ID 258 gen 8 top level 257 path BBB/dir/CCC
 ID 259 gen 11 top level 256 path ZZZ

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc |   8 +-
 cmds-subvolume.c   | 144 +
 2 files changed, 119 insertions(+), 33 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index f3eb4e26..99fff977 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -95,6 +95,12 @@ The output format is similar to *subvolume list* command.
 
 *list* [options] [-G [\+|-]] [-C [+|-]] 
[--sort=rootid,gen,ogen,path] ::
 List the subvolumes present in the filesystem .
+By default, this only lists the subvolumes under ,
+including  itself (except top-level subvolume).
++
+This command had required root privileges. From kernel 4.18,
+non privileged user can call this too. Also from kernel 4.18,
+It is possible to specify non-subvolume directory as .
 +
 For every subvolume the following information is shown by default:
 +
@@ -102,7 +108,7 @@ ID  gen  top level  path 
 +
 where ID is subvolume's id, gen is an internal counter which is updated
 every transaction, top level is the same as parent subvolume's id, and
-path is the relative path of the subvolume to the top level subvolume.
+path is the relative path of the subvolume to the specified path.
 The subvolume's ID may be used by the subvolume set-default command,
 or at mount time via the subvolid= option.
 +
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 279a6e51..23596c17 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1125,9 +1125,37 @@ out:
return subvols;
 }
 
+static int add_subvol(struct subvol_list **subvols,
+ struct listed_subvol *subvol,
+ size_t *capacity)
+{
+   if ((*subvols)->num >= *capacity) {
+   struct subvol_list *new_subvols;
+   size_t new_capacity = max_t(size_t, 1, *capacity * 2);
+
+   new_subvols = realloc(*subvols,
+ sizeof(*new_subvols) +
+ new_capacity *
+ sizeof(new_subvols->subvols[0]));
+   if (!new_subvols) {
+   error("out of memory");
+   return -1;
+   }
+
+   *subvols = new_subvols;
+   *capacity = new_capacity;
+   }
+
+   (*subvols)->subvols[(*subvols)->num] = *subvol;
+   (*subvols)->num++;
+
+   return 0;
+}
+
 static void get_subvols_info(struct subvol_list **subvols,
 struct btrfs_list_filter_set_v2 *filter_set,
 int fd,
+int tree_id,
 size_t *capacity)
 {
struct btrfs_util_subvolume_iterator *iter;
@@ -1135,7 +1163,7 @@ static void get_subvols_info(struct subvol_list **subvols,
int ret = -1;
 
err = btrfs_util_create_subvolume_iterator_fd(fd,
- BTRFS_FS_TREE_OBJECTID, 0,
+ tree_id, 0,
  );
if (err) {
iter = NULL;
@@ -1143,6 +1171,52 @@ static void get_subvols_info(struct subvol_list 
**subvols,
goto out;
}
 
+   /*
+* Subvolume iterator does not include the information of the
+ 

[PATCH v2 14/20] btrfs-progs: sub list: Update help message of -o option

2018-06-18 Thread Misono Tomohiro
Currently "sub list -o" lists only child subvolumes of the specified
path. So, update help message and variable name more appropriately.

Signed-off-by: Misono Tomohiro 
---
 Documentation/btrfs-subvolume.asciidoc |  2 +-
 cmds-subvolume.c   | 10 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/btrfs-subvolume.asciidoc 
b/Documentation/btrfs-subvolume.asciidoc
index 20fae1e1..0381c92c 100644
--- a/Documentation/btrfs-subvolume.asciidoc
+++ b/Documentation/btrfs-subvolume.asciidoc
@@ -116,7 +116,7 @@ or at mount time via the subvolid= option.
 +
 Path filtering;;
 -o
-print only subvolumes below specified .
+print only subvolumes which the subvolume of  contains.
 -a
 print all the subvolumes in the filesystem, including subvolumes
 which cannot be accessed from current mount point.
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index dab266aa..552c6dea 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1550,7 +1550,7 @@ static const char * const cmd_subvol_list_usage[] = {
"It is possible to specify non-subvolume directory as .",
"",
"Path filtering:",
-   "-o   print only subvolumes below specified path",
+   "-o   print only subvolumes which the subvolume of  
contains",
"-a   print all the subvolumes in the filesystem.",
" path to be shown is relative to the top-level",
" subvolume (require root privileges)",
@@ -1605,7 +1605,7 @@ static int cmd_subvol_list(int argc, char **argv)
int follow_mount = 0;
int sort = 0;
int no_sort = 0;
-   int is_only_in_path = 0;
+   int is_only_child = 0;
int absolute_path = 0;
DIR *dirstream = NULL;
enum btrfs_list_layout layout = BTRFS_LIST_LAYOUT_DEFAULT;
@@ -1651,7 +1651,7 @@ static int cmd_subvol_list(int argc, char **argv)
btrfs_list_setup_print_column_v2(BTRFS_LIST_GENERATION);
break;
case 'o':
-   is_only_in_path = 1;
+   is_only_child = 1;
break;
case 't':
layout = BTRFS_LIST_LAYOUT_TABLE;
@@ -1732,7 +1732,7 @@ static int cmd_subvol_list(int argc, char **argv)
goto out;
}
 
-   if (follow_mount && (is_list_all || is_only_in_path)) {
+   if (follow_mount && (is_list_all || is_only_child)) {
ret = -1;
error("cannot use -f with -a or -o option");
goto out;
@@ -1760,7 +1760,7 @@ static int cmd_subvol_list(int argc, char **argv)
if (ret)
goto out;
 
-   if (is_only_in_path)
+   if (is_only_child)
btrfs_list_setup_filter_v2(_set,
BTRFS_LIST_FILTER_TOPID_EQUAL,
top_id);
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 18/20] btrfs-progs: test: Add helper function to check if test user exists

2018-06-18 Thread Misono Tomohiro
Test user 'progs-test' will be used to test the behavior of normal user.

In order to pass this check, add the user by "useradd -M progs-test".
Note that progs-test should not have root privileges.

Signed-off-by: Misono Tomohiro 
---
 tests/common | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/tests/common b/tests/common
index 7e4e09df..7a37a4cd 100644
--- a/tests/common
+++ b/tests/common
@@ -314,6 +314,16 @@ check_global_prereq()
fi
 }
 
+check_testuser()
+{
+   id -u progs-test > /dev/null 2>&1
+   if [ $? -ne 0 ]; then
+   _not_run "Need to add user \"progs-test\""
+   fi
+   # Note that progs-test should not have root privileges
+   # otherwise test may not run as expected
+}
+
 check_image()
 {
local image
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 20/20] btrfs-progs: test: Add cli-test/010 to check "subvolume list -f" option

2018-06-18 Thread Misono Tomohiro
Signed-off-by: Misono Tomohiro 
---
 tests/cli-tests/010-subvolume-list-follow/test.sh | 86 +++
 1 file changed, 86 insertions(+)
 create mode 100755 tests/cli-tests/010-subvolume-list-follow/test.sh

diff --git a/tests/cli-tests/010-subvolume-list-follow/test.sh 
b/tests/cli-tests/010-subvolume-list-follow/test.sh
new file mode 100755
index ..8fb746c6
--- /dev/null
+++ b/tests/cli-tests/010-subvolume-list-follow/test.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# test for "subvolume list -f"
+
+source "$TEST_TOP/common"
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+setup_loopdevs 2
+prepare_loopdevs
+dev1=${loopdevs[1]}
+dev2=${loopdevs[2]}
+
+# test if the ids returned by "sub list" match expected ids
+# $1  ... option for subvolume list
+# $2  ... PATH to be specified by sub list command
+# $3~ ... expected return ids
+test_list()
+{
+   result=$(run_check_stdout $SUDO "$TOP/btrfs" subvolume list $1 "$2" | \
+   awk '{print $2}' | xargs | sort -n)
+
+   shift
+   shift
+   expected=($(echo "$@" | tr " " "\n" | sort -n))
+   expected=$(IFS=" "; echo "${expected[*]}")
+
+   if [ "$result" != "$expected" ]; then
+   echo "result  : $result"
+   echo "expected: $expected"
+   _fail "ids returned by sub list does not match expected ids"
+   fi
+}
+
+run_check $SUDO_HELPER "$TOP/mkfs.btrfs" -f "$dev1"
+run_check $SUDO_HELPER "$TOP/mkfs.btrfs" -f "$dev2"
+
+run_check $SUDO_HELPER mount "$dev1" "$TEST_MNT"
+cd "$TEST_MNT"
+
+# Create some subvolumes and directories
+#  (id 5)
+#   |- AAA (id 256)
+#   |   |- top
+#   |   |- bbb
+#   |   -- ccc
+#   |
+#   |- BBB (id 258)
+#   -- CCC (id 259)
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create AAA
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create BBB
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create CCC
+run_check $SUDO_HELPER mkdir AAA/top
+run_check $SUDO_HELPER mkdir AAA/bbb
+run_check $SUDO_HELPER mkdir AAA/ccc
+
+test_list "" "." "256 258 259"
+test_list "-f" "." "256 258 259"
+cd ..
+run_check $SUDO_HELPER umount "$TEST_MNT"
+
+# Mount as follows:
+#
+# "TEST_MNT" (AAA)
+#   |- top (FS_TREE)
+#   |   |- AAA
+#   |   |- BBB
+#   |   -- CCC
+#   |
+#   |- bbb (BBB)
+#   -- ccc (CCC)
+run_check $SUDO_HELPER mount -o subvol=AAA "$dev1" "$TEST_MNT"
+run_check $SUDO_HELPER mount "$dev1" "$TEST_MNT/top"
+run_check $SUDO_HELPER mount -o subvol=BBB "$dev1" "$TEST_MNT/bbb"
+run_check $SUDO_HELPER mount -o subvol=CCC "$dev1" "$TEST_MNT/ccc"
+
+cd "$TEST_MNT"
+test_list "" "." "256"
+# With -f option, subvolume AAA/BBB/CCC will be counted twice.
+# Also, it will list FS_TREE (5) if it is mounted below the specified path.
+test_list "-f" "." "5 256 256 258 258 259 259"
+
+cd ..
+run_check $SUDO_HELPER umount -R "$TEST_MNT"
+cleanup_loopdevs
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 19/20] btrfs-porgs: test: Add cli-test/009 to check subvolume list for both root and normal user

2018-06-18 Thread Misono Tomohiro
Signed-off-by: Misono Tomohiro 
---
 tests/cli-tests/009-subvolume-list/test.sh | 134 +
 1 file changed, 134 insertions(+)
 create mode 100755 tests/cli-tests/009-subvolume-list/test.sh

diff --git a/tests/cli-tests/009-subvolume-list/test.sh 
b/tests/cli-tests/009-subvolume-list/test.sh
new file mode 100755
index ..bf6c31d6
--- /dev/null
+++ b/tests/cli-tests/009-subvolume-list/test.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+# test for "subvolume list" both for root and normal user
+
+source "$TEST_TOP/common"
+
+check_testuser
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev
+
+
+# test if the ids returned by "sub list" match expected ids
+# $1  ... indicate run as root or test user
+# $2  ... PATH to be specified by sub list command
+# $3~ ... expected return ids
+test_list()
+{
+   local SUDO
+   if [ $1 -eq 1 ]; then
+   SUDO=$SUDO_HELPER
+   else
+   SUDO="sudo -u progs-test"
+   fi
+
+   result=$(run_check_stdout $SUDO "$TOP/btrfs" subvolume list "$2" | \
+   awk '{print $2}' | xargs | sort -n)
+
+   shift
+   shift
+   expected=($(echo "$@" | tr " " "\n" | sort -n))
+   expected=$(IFS=" "; echo "${expected[*]}")
+
+   if [ "$result" != "$expected" ]; then
+   echo "result  : $result"
+   echo "expected: $expected"
+   _fail "ids returned by sub list does not match expected ids"
+   fi
+}
+
+run_check $SUDO_HELPER "$TOP/mkfs.btrfs" -f "$TEST_DEV"
+run_check_mount_test_dev
+cd "$TEST_MNT"
+
+# create subvolumes and directories and make some non-readable
+# by user 'progs-test'
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub1
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub1/subsub1
+run_check $SUDO_HELPER mkdir sub1/dir
+
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub2
+run_check $SUDO_HELPER mkdir -p sub2/dir/dirdir
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub2/dir/subsub2
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub2/dir/dirdir/subsubX
+
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub3
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub3/subsub3
+run_check $SUDO_HELPER mkdir sub3/dir
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub3/dir/subsubY
+run_check $SUDO_HELPER chmod o-r sub3
+
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub4
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub4/subsub4
+run_check $SUDO_HELPER mkdir sub4/dir
+run_check $SUDO_HELPER "$TOP/btrfs" subvolume create sub4/dir/subsubZ
+run_check $SUDO_HELPER setfacl -m u:progs-test:- sub4/dir
+
+run_check $SUDO_HELPER touch "file"
+
+# expected result for root at mount point:
+#
+# ID 256 gen 8 top level 5 path sub1
+# ID 258 gen 7 top level 256 path sub1/subsub1
+# ID 259 gen 10 top level 5 path sub2
+# ID 260 gen 9 top level 259 path sub2/dir/subsub2
+# ID 261 gen 10 top level 259 path sub2/dir/dirdir/subsubX
+# ID 262 gen 14 top level 5 path sub3
+# ID 263 gen 12 top level 262 path sub3/subsub3
+# ID 264 gen 13 top level 262 path sub3/dir/subsubY
+# ID 265 gen 17 top level 5 path sub4
+# ID 266 gen 15 top level 265 path sub4/subsub4
+# ID 267 gen 16 top level 265 path sub4/dir/subsubZ
+
+# check for root for both absolute/relative path
+all=(256 258 259 260 261 262 263 264 265 266 267)
+test_list 1 "$TEST_MNT" "${all[@]}"
+test_list 1 "$TEST_MNT/sub1" "256 258"
+test_list 1 "$TEST_MNT/sub1/dir" ""
+test_list 1 "$TEST_MNT/sub2" "259 260 261"
+test_list 1 "$TEST_MNT/sub2/dir" "260 261"
+test_list 1 "$TEST_MNT/sub3" "262 263 264"
+test_list 1 "$TEST_MNT/sub4" "265 266 267"
+run_mustfail "should fail for file" \
+   $SUDO_HELPER "$TOP/btrfs" subvolume list "$TEST_MNT/file"
+
+test_list 1 "." "${all[@]}"
+test_list 1 "sub1" "256 258"
+test_list 1 "sub1/dir" ""
+test_list 1 "sub2" "259 260 261"
+test_list 1 "sub2/dir" "260 261"
+test_list 1 "sub3" "262 263 264"
+test_list 1 "sub4" "265 266 267"
+run_mustfail "should fail for file" \
+   $SUDO_HELPER "$TOP/btrfs" subvolume list "file"
+
+# check for normal user for both absolute/relative path
+test_list 0 "$TEST_MNT" "256 258 259 260 261 265 266"
+test_list 0 "$TEST_MNT/sub1" "256 258"
+test_list 0 "$TEST_MNT/sub1/dir"

[PATCH v2 16/20] btrfs-progs: utils: Fallback to open without O_NOATIME flag in find_mount_root():

2018-06-18 Thread Misono Tomohiro
O_NOATIME flag requires effective UID of process matches file's owner
or has CAP_FOWNER capabilities. Fallback to open without O_NOATIME flag
so that non-privileged user can also call find_mount_root().

This is a preparation work to allow non-privileged user to call
"subvolume show".

Signed-off-by: Misono Tomohiro 
---
 utils.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/utils.c b/utils.c
index 21de09d3..1a4366e8 100644
--- a/utils.c
+++ b/utils.c
@@ -2048,6 +2048,9 @@ int find_mount_root(const char *path, char **mount_root)
char *longest_match = NULL;
 
fd = open(path, O_RDONLY | O_NOATIME);
+   if (fd < 0 && errno == EPERM)
+   fd = open(path, O_RDONLY);
+
if (fd < 0)
return -errno;
close(fd);
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 08/20] btrfs-progs: sub list: factor out main part of btrfs_list_subvols

2018-06-18 Thread Misono Tomohiro
No functional changes.
This is a preparation work for reworking "subvolume list".

Signed-off-by: Misono Tomohiro 
---
 cmds-subvolume.c | 50 ++
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index c54a8003..279a6e51 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -1125,22 +1125,15 @@ out:
return subvols;
 }
 
-static struct subvol_list *btrfs_list_subvols(int fd,
- struct btrfs_list_filter_set_v2 
*filter_set)
+static void get_subvols_info(struct subvol_list **subvols,
+struct btrfs_list_filter_set_v2 *filter_set,
+int fd,
+size_t *capacity)
 {
-   struct subvol_list *subvols;
-   size_t capacity = 0;
struct btrfs_util_subvolume_iterator *iter;
enum btrfs_util_error err;
int ret = -1;
 
-   subvols = malloc(sizeof(*subvols));
-   if (!subvols) {
-   error("out of memory");
-   return NULL;
-   }
-   subvols->num = 0;
-
err = btrfs_util_create_subvolume_iterator_fd(fd,
  BTRFS_FS_TREE_OBJECTID, 0,
  );
@@ -1168,11 +1161,11 @@ static struct subvol_list *btrfs_list_subvols(int fd,
continue;
}
 
-   if (subvols->num >= capacity) {
+   if ((*subvols)->num >= *capacity) {
struct subvol_list *new_subvols;
-   size_t new_capacity = max_t(size_t, 1, capacity * 2);
+   size_t new_capacity = max_t(size_t, 1, *capacity * 2);
 
-   new_subvols = realloc(subvols,
+   new_subvols = realloc(*subvols,
  sizeof(*new_subvols) +
  new_capacity *
  sizeof(new_subvols->subvols[0]));
@@ -1181,12 +1174,12 @@ static struct subvol_list *btrfs_list_subvols(int fd,
goto out;
}
 
-   subvols = new_subvols;
-   capacity = new_capacity;
+   *subvols = new_subvols;
+   *capacity = new_capacity;
}
 
-   subvols->subvols[subvols->num] = subvol;
-   subvols->num++;
+   (*subvols)->subvols[(*subvols)->num] = subvol;
+   (*subvols)->num++;
}
 
ret = 0;
@@ -1194,9 +1187,26 @@ out:
if (iter)
btrfs_util_destroy_subvolume_iterator(iter);
if (ret) {
-   free_subvol_list(subvols);
-   subvols = NULL;
+   free_subvol_list(*subvols);
+   *subvols = NULL;
+   }
+}
+
+static struct subvol_list *btrfs_list_subvols(int fd,
+ struct btrfs_list_filter_set_v2 
*filter_set)
+{
+   struct subvol_list *subvols;
+   size_t capacity = 0;
+
+   subvols = malloc(sizeof(*subvols));
+   if (!subvols) {
+   error("out of memory");
+   return NULL;
}
+   subvols->num = 0;
+
+   get_subvols_info(, filter_set, fd, );
+
return subvols;
 }
 
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] btrfs: Add test that checks rmdir(2) can delete a subvolume

2018-06-13 Thread Misono Tomohiro
Add btrfs test that checks "rmdir" or "rm -r" command can delete a
subvolume like an ordinary directory.

This behavior has been restricted long time but becomes allowed by
following commit in the kernel:
  btrfs: Allow rmdir(2) to delete an empty subvolume

The test will be skipped if kernel does not support the feature,
which can be checked whether /sys/fs/btrfs/features/rmdir_subvol
exists or not.

Reviewed-by: David Sterba 
Signed-off-by: Misono Tomohiro 
---
changelog:
 v2 -> v3 - Skip test if kernel does not support the feature by
checking sysfs
  - Update license notation
 
 This test should pass on kernel 4.18-rc1~ (or in current linus' master),
 otherwise it will be skipped. Please change the test number appropriately
 when applied as other pending tests exists.

 Thanks,
 Tomohiro Misono

 tests/btrfs/200 | 128 
 tests/btrfs/200.out |   2 +
 tests/btrfs/group   |   1 +
 3 files changed, 131 insertions(+)
 create mode 100755 tests/btrfs/200
 create mode 100644 tests/btrfs/200.out

diff --git a/tests/btrfs/200 b/tests/btrfs/200
new file mode 100755
index ..15213eed
--- /dev/null
+++ b/tests/btrfs/200
@@ -0,0 +1,128 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Fujitsu. All Rights Reserved.
+#
+# FS QA Test btrfs/200
+#
+# QA test that checks rmdir(2) works for subvolumes like ordinary directories.
+#
+# This behavior has been restricted long time but becomes allowed by following
+# patch in the kernel:
+#   btrfs: Allow rmdir(2) to delete an empty subvolume
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+   cd /
+   rm -f $tmp.*
+}
+
+create_subvol()
+{
+   $BTRFS_UTIL_PROG subvolume create $1 >> $seqres.full 2>&1
+}
+
+create_snapshot()
+{
+   $BTRFS_UTIL_PROG subvolume snapshot $@ >> $seqres.full 2>&1
+}
+
+rmdir_subvol()
+{
+   rmdir $1 >> $seqres.full 2>&1
+}
+
+rm_r_subvol() {
+   rm -r $1 >> $seqres.full 2>&1
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+
+if [ ! -e /sys/fs/btrfs/features/rmdir_subvol ]; then
+   _notrun "The kernel does not support the deletion of subvolume by rmdir"
+fi
+
+_scratch_mkfs > /dev/null 2>&1 || _fail "mkfs failed"
+_scratch_mount
+
+# Check that an empty subvolume can be deleted by rmdir
+create_subvol $SCRATCH_MNT/sub1
+rmdir_subvol $SCRATCH_MNT/sub1 || \
+   echo "rmdir should delete an empty subvolume"
+
+# Check that non-empty subvolume cannot be deleted by rmdir
+create_subvol $SCRATCH_MNT/sub2
+touch $SCRATCH_MNT/sub2/file
+rmdir_subvol $SCRATCH_MNT/sub2 && \
+   echo "rmdir should fail for non-empty subvolume"
+rm $SCRATCH_MNT/sub2/file
+rmdir_subvol $SCRATCH_MNT/sub2 || \
+   echo "rmdir should delete an empty subvolume"
+
+# Check that read-only empty subvolume can be deleted by rmdir
+create_subvol $SCRATCH_MNT/sub3
+create_snapshot -r $SCRATCH_MNT/sub3 $SCRATCH_MNT/snap
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/sub3 ro true >> $seqres.full 2>&1
+rmdir_subvol $SCRATCH_MNT/sub3 || \
+   echo "rmdir should delete an empty subvolume"
+rmdir_subvol $SCRATCH_MNT/snap || \
+   echo "rmdir should delete a readonly empty subvolume"
+
+# Check that the default subvolume cannot be deleted by rmdir
+create_subvol $SCRATCH_MNT/sub4
+subvolid=$(_btrfs_get_subvolid $SCRATCH_MNT sub4)
+$BTRFS_UTIL_PROG subvolume set-default $subvolid $SCRATCH_MNT \
+   >> $seqres.full 2>&1
+rmdir_subvol $SCRATCH_MNT/sub4 && \
+   echo "rmdir should fail for the default subvolume"
+
+# Check that subvolume stub (created by snapshot) can be deleted by rmdir
+# (Note: this has been always allowed)
+create_subvol $SCRATCH_MNT/sub5
+create_subvol $SCRATCH_MNT/sub5/sub6
+create_snapshot $SCRATCH_MNT/sub5 $SCRATCH_MNT/snap2
+rmdir $SCRATCH_MNT/snap2/sub6 || \
+   echo "rmdir should delete a subvolume stub (ino number == 2)"
+
+# Check that rm -r works for both non-snapshot subvolume and snapshot
+create_subvol $SCRATCH_MNT/sub7
+mkdir $SCRATCH_MNT/sub7/dir
+create_subvol $SCRATCH_MNT/sub7/dir/sub8
+touch $SCRATCH_MNT/sub7/dir/sub8/file
+
+create_snapshot $SCRATCH_MNT/sub7 $SCRATCH_MNT/snap3
+create_snapshot -r $SCRATCH_MNT/sub7 $SCRATCH_MNT/snap4
+
+rm_r_subvol $SCRATCH_MNT/sub7 || \
+   echo "rm -r should delete subvolumes recursively"
+rm_r_subvol $SCRATCH_MNT/snap3 || \
+   echo "rm -r should delete subvol

Re: [PATCH] btrfs-progs: check: Initialize all filed of btrfs_inode_item in insert_inode_item()

2018-06-07 Thread Misono Tomohiro
On 2018/06/07 21:22, David Sterba wrote:
> On Thu, Jun 07, 2018 at 11:49:58AM +0900, Misono Tomohiro wrote:
>> Initialize all filed of btrfs_inode_item to zero in order to prevent
>> having some garbage, especially for flags field.
> 
> Have you observed in practice or is it a matter of precaution?

I saw failure of fsck-test/010 in yesterday's devel branch and
made this patch. It turned out that root cause was wrong flag comparison
in btrfs check.
(https://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg77758.html)

With Su's fix, failure of fsck-test/010 is also gone without this patch,
but it is better to initialize the variables anyway.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs-progs: check: Initialize all filed of btrfs_inode_item in insert_inode_item()

2018-06-06 Thread Misono Tomohiro
Initialize all filed of btrfs_inode_item to zero in order to prevent
having some garbage, especially for flags field.

Signed-off-by: Misono Tomohiro 
---
 check/mode-common.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/check/mode-common.c b/check/mode-common.c
index db9e4299..15e2bbd1 100644
--- a/check/mode-common.c
+++ b/check/mode-common.c
@@ -379,18 +379,14 @@ int insert_inode_item(struct btrfs_trans_handle *trans,
time_t now = time(NULL);
int ret;
 
+   memset(, 0, sizeof(ii));
btrfs_set_stack_inode_size(, size);
btrfs_set_stack_inode_nbytes(, nbytes);
btrfs_set_stack_inode_nlink(, nlink);
btrfs_set_stack_inode_mode(, mode);
btrfs_set_stack_inode_generation(, trans->transid);
-   btrfs_set_stack_timespec_nsec(, 0);
btrfs_set_stack_timespec_sec(, now);
-   btrfs_set_stack_timespec_nsec(, 0);
btrfs_set_stack_timespec_sec(, now);
-   btrfs_set_stack_timespec_nsec(, 0);
-   btrfs_set_stack_timespec_sec(, 0);
-   btrfs_set_stack_timespec_nsec(, 0);
 
ret = btrfs_insert_inode(trans, root, ino, );
ASSERT(!ret);
-- 
2.14.4


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/3] btrfs-progs: check: check symlinks with append/immutable flags

2018-06-06 Thread Misono Tomohiro



On 2018/05/15 10:33, Su Yue wrote:
> Define new macro I_ERR_ODD_INODE_FLAGS to represents odd inode flags.
> 
> Symlinks should never have append/immutable flags.
> While processing inodes, if found a symlink with append/immutable
> flags, mark the inode record with I_ERR_ODD_INODE_FLAGS.
> 
> This is for original mode.
> 
> Signed-off-by: Su Yue 
> ---
>  check/main.c  | 7 +++
>  check/mode-original.h | 1 +
>  2 files changed, 8 insertions(+)
> 
> diff --git a/check/main.c b/check/main.c
> index 68da994f7ae0..c764fc011ded 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -576,6 +576,8 @@ static void print_inode_error(struct btrfs_root *root, 
> struct inode_record *rec)
>   fprintf(stderr, ", link count wrong");
>   if (errors & I_ERR_FILE_EXTENT_ORPHAN)
>   fprintf(stderr, ", orphan file extent");
> + if (errors & I_ERR_ODD_INODE_FLAGS)
> + fprintf(stderr, ", odd inode flags");
>   fprintf(stderr, "\n");
>   /* Print the orphan extents if needed */
>   if (errors & I_ERR_FILE_EXTENT_ORPHAN)
> @@ -805,6 +807,7 @@ static int process_inode_item(struct extent_buffer *eb,
>  {
>   struct inode_record *rec;
>   struct btrfs_inode_item *item;
> + u64 flags;
>  
>   rec = active_node->current;
>   BUG_ON(rec->ino != key->objectid || rec->refs > 1);
> @@ -822,6 +825,10 @@ static int process_inode_item(struct extent_buffer *eb,
>   rec->found_inode_item = 1;
>   if (rec->nlink == 0)
>   rec->errors |= I_ERR_NO_ORPHAN_ITEM;
> + flags = btrfs_inode_flags(eb, item);
> + if (rec->imode & BTRFS_FT_SYMLINK &&

Hello,

I observed that this commit causes test-convert/009 in current kdave/devel 
branch.
Since rec->imode uses S_IFLNK (0xa000) for symbolic link and BTRFS_FT_SYMLINK 
is 7,
above statement does not work well. Shouldn't we use S_ISLNK(rec->imode) 
instead?

Thanks,
Tomohiro Misono

> + flags & (BTRFS_INODE_IMMUTABLE | BTRFS_INODE_APPEND))
> + rec->errors |= I_ERR_ODD_INODE_FLAGS;
>   maybe_free_inode_rec(_node->inode_cache, rec);
>   return 0;
>  }
> diff --git a/check/mode-original.h b/check/mode-original.h
> index 368de692fdd1..13cfa5b9e1b3 100644
> --- a/check/mode-original.h
> +++ b/check/mode-original.h
> @@ -186,6 +186,7 @@ struct file_extent_hole {
>  #define I_ERR_LINK_COUNT_WRONG   (1 << 13)
>  #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
>  #define I_ERR_FILE_EXTENT_TOO_LARGE  (1 << 15)
> +#define I_ERR_ODD_INODE_FLAGS(1 << 16)
>  
>  struct inode_record {
>   struct list_head backrefs;
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: Check error of btrfs_iget() in btrfs_search_path_in_tree_user()

2018-06-04 Thread Misono Tomohiro
Signed-off-by: Misono Tomohiro 
---

Hi david,

It seems that v8 patch I sent last week is missed and commit in misc-next
tree is also a bit updated from v7, so I resend the fix as a separate patch.

Please fold this patch to current misc-next (head is the commit to be fixed:
"btrfs: Add unprivileged version of ino_lookup ioctl").

Thanks,
Tomohiro Misono

 fs/btrfs/ioctl.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d29992f7dc63..5556e9ea2a4b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2438,6 +2438,10 @@ static int btrfs_search_path_in_tree_user(struct inode 
*inode,
}
 
temp_inode = btrfs_iget(sb, , root, NULL);
+   if (IS_ERR(temp_inode)) {
+   ret = PTR_ERR(temp_inode);
+   goto out;
+   }
ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
iput(temp_inode);
if (ret) {
-- 
2.14.3


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: checking for NULL instead of IS_ERR

2018-05-31 Thread Misono Tomohiro
Thanks again.

David, could you apply this on top of my v7 patch in for-next tree?

Regards,
Tomohiro Misono

On 2018/05/31 15:21, Dan Carpenter wrote:
> memdup_user() returns error pointers, it doesn't return NULL.
> 
> Fixes: 01141b08dee5 ("btrfs: Add unprivileged ioctl which returns subvolume's 
> ROOT_REF")
> Signed-off-by: Dan Carpenter 
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index ff5018587bd9..d8dd4504bdab 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2791,9 +2791,9 @@ static noinline int 
> btrfs_ioctl_get_subvol_rootref(struct file *file,
>   return -ENOMEM;
>  
>   rootrefs = memdup_user(argp, sizeof(*rootrefs));
> - if (!rootrefs) {
> + if (IS_ERR(rootrefs)) {
>   btrfs_free_path(path);
> - return -ENOMEM;
> + return PTR_ERR(rootrefs);
>   }
>  
>   inode = file_inode(file);
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 3/3] btrfs: Add unprivileged version of ino_lookup ioctl

2018-05-30 Thread Misono Tomohiro
Add unprivileged version of ino_lookup ioctl BTRFS_IOC_INO_LOOKUP_USER
to allow normal users to call "btrfs subvolume list/show" etc. in
combination with BTRFS_IOC_GET_SUBVOL_INFO/BTRFS_IOC_GET_SUBVOL_ROOTREF.

This can be used like BTRFS_IOC_INO_LOOKUP but the argument is
different. This is  because it always searches the fs/file tree
correspoinding to the fd with which this ioctl is called and also
returns the name of bottom subvolume.

The main differences from original ino_lookup ioctl are:
  1. Read + Exec permission will be checked using inode_permission()
 during path construction. -EACCES will be returned in case
 of failure.
  2. Path construction will be stopped at the inode number which
 corresponds to the fd with which this ioctl is called. If
 constructed path does not exist under fd's inode, -EACCES
 will be returned.
  3. The name of bottom subvolume is also searched and filled.

Note that the maximum length of path is shorter 256 (BTRFS_VOL_NAME_MAX+1)
bytes than ino_lookup ioctl because of space of subvolume's name.

Reviewed-by: Gu Jinxiang 
Reviewed-by: Qu Wenruo 
Tested-by: Gu Jinxiang 
Signed-off-by: Misono Tomohiro 
---
 v7 -> v8
   - check return value of btrfs_iget(), which is reported by Dan Carpenter

 fs/btrfs/ioctl.c   | 208 +
 include/uapi/linux/btrfs.h |  17 
 2 files changed, 225 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ce28593e8622..811ac39be0c5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2341,6 +2341,170 @@ static noinline int btrfs_search_path_in_tree(struct 
btrfs_fs_info *info,
return ret;
 }
 
+static noinline int btrfs_search_path_in_tree_user(struct inode *inode,
+   struct btrfs_ioctl_ino_lookup_user_args *args)
+{
+   struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+   struct super_block *sb = inode->i_sb;
+   struct btrfs_key upper_limit = BTRFS_I(inode)->location;
+   u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
+   u64 dirid = args->dirid;
+
+   unsigned long item_off;
+   unsigned long item_len;
+   struct btrfs_inode_ref *iref;
+   struct btrfs_root_ref *rref;
+   struct btrfs_root *root;
+   struct btrfs_path *path;
+   struct btrfs_key key, key2;
+   struct extent_buffer *l;
+   struct inode *temp_inode;
+   char *ptr;
+   int slot;
+   int len;
+   int total_len = 0;
+   int ret;
+
+   path = btrfs_alloc_path();
+   if (!path)
+   return -ENOMEM;
+
+   /*
+* If the bottom subvolume does not exist directly under upper_limit,
+* construct the path in bottomup way.
+*/
+   if (dirid != upper_limit.objectid) {
+   ptr = >path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
+
+   key.objectid = treeid;
+   key.type = BTRFS_ROOT_ITEM_KEY;
+   key.offset = (u64)-1;
+   root = btrfs_read_fs_root_no_name(fs_info, );
+   if (IS_ERR(root)) {
+   ret = PTR_ERR(root);
+   goto out;
+   }
+
+   key.objectid = dirid;
+   key.type = BTRFS_INODE_REF_KEY;
+   key.offset = (u64)-1;
+   while (1) {
+   ret = btrfs_search_slot(NULL, root, , path, 0, 0);
+   if (ret < 0) {
+   goto out;
+   } else if (ret > 0) {
+   ret = btrfs_previous_item(root, path, dirid,
+ BTRFS_INODE_REF_KEY);
+   if (ret < 0) {
+   goto out;
+   } else if (ret > 0) {
+   ret = -ENOENT;
+   goto out;
+   }
+   }
+
+   l = path->nodes[0];
+   slot = path->slots[0];
+   btrfs_item_key_to_cpu(l, , slot);
+
+   iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
+   len = btrfs_inode_ref_name_len(l, iref);
+   ptr -= len + 1;
+   total_len += len + 1;
+   if (ptr < args->path) {
+   ret = -ENAMETOOLONG;
+   goto out;
+   }
+
+   *(ptr + len) = '/';
+   read_extent_buffer(l, ptr,
+   (unsigned long)(iref + 1), len);
+
+   /* Check the read+exec permission of this directory */
+   ret = btrfs_previous_item(root, path, dirid,
+  

Re: [bug report] btrfs: Add unprivileged version of ino_lookup ioctl

2018-05-30 Thread Misono Tomohiro
Thanks for the reporting.
I will update the patch.

Regards,
Tomohiro Misono

On 2018/05/30 19:19, Dan Carpenter wrote:
> Hello Tomohiro Misono,
> 
> The patch 56bfce6502b7: "btrfs: Add unprivileged version of
> ino_lookup ioctl" from May 16, 2018, leads to the following static
> checker warning:
> 
>   fs/btrfs/ioctl.c:2478 btrfs_search_path_in_tree_user()
>   error: 'temp_inode' dereferencing possible ERR_PTR()
> 
> fs/btrfs/ioctl.c
>   2469  l = path->nodes[0];
>   2470  slot = path->slots[0];
>   2471  btrfs_item_key_to_cpu(l, , slot);
>   2472  if (key2.objectid != dirid) {
>   2473  ret = -ENOENT;
>   2474  goto out;
>   2475  }
>   2476  
>   2477  temp_inode = btrfs_iget(sb, , root, 
> NULL);
> ^^^
>   2478  ret = inode_permission(temp_inode, MAY_READ | 
> MAY_EXEC);
>^^
>   2479  iput(temp_inode);
>   2480  if (ret) {
>   2481  ret = -EACCES;
>   2482  goto out;
>   2483  }
>   2484  
> 
> 
> regards,
> dan carpenter
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   3   4   >