[PATCH v2] btrfs-progs: qgroup: add sync option to 'qgroup show'

2016-12-06 Thread Tsutomu Itoh
The 'qgroup show' command does not synchronize filesystem.
Therefore, 'qgroup show' may not display the correct value unless
synchronized with 'filesystem sync' command etc.

So add the '--sync' and '--no-sync' options so that we can choose
whether or not to synchronize when executing the command.

Signed-off-by: Tsutomu Itoh 
---
v2: use getopt_long with enum instead of single letter (suggested by Qu)
---
 Documentation/btrfs-qgroup.asciidoc |  6 ++
 cmds-qgroup.c   | 33 +
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/Documentation/btrfs-qgroup.asciidoc 
b/Documentation/btrfs-qgroup.asciidoc
index 438dbc7..9c65795 100644
--- a/Documentation/btrfs-qgroup.asciidoc
+++ b/Documentation/btrfs-qgroup.asciidoc
@@ -126,6 +126,12 @@ Prefix \'+' means ascending order and \'-' means 
descending order of .
 If no prefix is given, use ascending order by default.
 +
 If multiple s is given, use comma to separate.
++
+--sync
+To retrieve information after updating the status of qgroups,
+invoke sync before getting information.
+--no-sync
+Do not invoke sync before getting information (default).
 
 EXIT STATUS
 ---
diff --git a/cmds-qgroup.c b/cmds-qgroup.c
index bc15077..b494f6f 100644
--- a/cmds-qgroup.c
+++ b/cmds-qgroup.c
@@ -272,8 +272,7 @@ static int cmd_qgroup_destroy(int argc, char **argv)
 }
 
 static const char * const cmd_qgroup_show_usage[] = {
-   "btrfs qgroup show -pcreFf "
-   "[--sort=qgroupid,rfer,excl,max_rfer,max_excl] ",
+   "btrfs qgroup show [options] ",
"Show subvolume quota groups.",
"-p print parent qgroup id",
"-c print child qgroup id",
@@ -288,6 +287,8 @@ static const char * const cmd_qgroup_show_usage[] = {
"   list qgroups sorted by specified items",
"   you can use '+' or '-' in front of each item.",
"   (+:ascending, -:descending, ascending default)",
+   "--sync invoke sync before getting info",
+   "--no-sync  do not invoke sync before getting info (default)",
NULL
 };
 
@@ -301,6 +302,7 @@ static int cmd_qgroup_show(int argc, char **argv)
u64 qgroupid;
int filter_flag = 0;
unsigned unit_mode;
+   int sync = 0;
 
struct btrfs_qgroup_comparer_set *comparer_set;
struct btrfs_qgroup_filter_set *filter_set;
@@ -311,8 +313,15 @@ static int cmd_qgroup_show(int argc, char **argv)
 
while (1) {
int c;
+   enum {
+   GETOPT_VAL_SORT = 256,
+   GETOPT_VAL_SYNC,
+   GETOPT_VAL_NO_SYNC
+   };
static const struct option long_options[] = {
-   {"sort", required_argument, NULL, 'S'},
+   {"sort", required_argument, NULL, GETOPT_VAL_SORT},
+   {"sync", no_argument, NULL, GETOPT_VAL_SYNC},
+   {"no-sync", no_argument, NULL, GETOPT_VAL_NO_SYNC},
{ NULL, 0, NULL, 0 }
};
 
@@ -342,12 +351,18 @@ static int cmd_qgroup_show(int argc, char **argv)
case 'f':
filter_flag |= 0x2;
break;
-   case 'S':
+   case GETOPT_VAL_SORT:
ret = btrfs_qgroup_parse_sort_string(optarg,
 _set);
if (ret)
usage(cmd_qgroup_show_usage);
break;
+   case GETOPT_VAL_SYNC:
+   sync = 1;
+   break;
+   case GETOPT_VAL_NO_SYNC:
+   sync = 0;
+   break;
default:
usage(cmd_qgroup_show_usage);
}
@@ -365,6 +380,16 @@ static int cmd_qgroup_show(int argc, char **argv)
return 1;
}
 
+   if (sync) {
+   ret = ioctl(fd, BTRFS_IOC_SYNC);
+   if (ret < 0) {
+   error("sync ioctl failed on '%s': %s", path,
+ strerror(errno));
+   close_file_or_dir(fd, dirstream);
+   goto out;
+   }
+   }
+
if (filter_flag) {
ret = lookup_path_rootid(fd, );
if (ret < 0) {
-- 
2.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: qgroup: add sync option to 'qgroup show'

2016-12-06 Thread Tsutomu Itoh
On 2016/12/07 13:59, Qu Wenruo wrote:
> 
> 
> At 12/07/2016 12:31 PM, Tsutomu Itoh wrote:
>> Hi Qu,
>>
>> Thanks for the review.
>>
>> On 2016/12/07 12:24, Qu Wenruo wrote:
>>>
>>>
>>> At 12/07/2016 11:07 AM, Tsutomu Itoh wrote:
 The 'qgroup show' command does not synchronize filesystem.
 Therefore, 'qgroup show' may not display the correct value unless
 synchronized with 'filesystem sync' command etc.

 So add the '--sync' and '--no-sync' options so that we can choose
 whether or not to synchronize when executing the command.
>>>
>>> Indeed, --sync would help in a lot of cases.
>>>

 Signed-off-by: Tsutomu Itoh 
 ---
  Documentation/btrfs-qgroup.asciidoc |  5 +
  cmds-qgroup.c   | 24 ++--
  2 files changed, 27 insertions(+), 2 deletions(-)

 diff --git a/Documentation/btrfs-qgroup.asciidoc 
 b/Documentation/btrfs-qgroup.asciidoc
 index 438dbc7..dd133ec 100644
 --- a/Documentation/btrfs-qgroup.asciidoc
 +++ b/Documentation/btrfs-qgroup.asciidoc
 @@ -126,6 +126,11 @@ Prefix \'+' means ascending order and \'-' means 
 descending order of .
  If no prefix is given, use ascending order by default.
  +
  If multiple s is given, use comma to separate.
 ++
 +--sync
 +invoke sync before getting info.
>>>
>>> A little more words would help, to info user that qgroup will only update 
>>> after sync.
>>>
 +--no-sync
 +do not invoke sync before getting info (default).

  EXIT STATUS
  ---
 diff --git a/cmds-qgroup.c b/cmds-qgroup.c
 index bc15077..ac339f3 100644
 --- a/cmds-qgroup.c
 +++ b/cmds-qgroup.c
 @@ -272,8 +272,7 @@ static int cmd_qgroup_destroy(int argc, char **argv)
  }

  static const char * const cmd_qgroup_show_usage[] = {
 -"btrfs qgroup show -pcreFf "
 -"[--sort=qgroupid,rfer,excl,max_rfer,max_excl] ",
 +"btrfs qgroup show [options] ",
  "Show subvolume quota groups.",
  "-p print parent qgroup id",
  "-c print child qgroup id",
 @@ -288,6 +287,8 @@ static const char * const cmd_qgroup_show_usage[] = {
  "   list qgroups sorted by specified items",
  "   you can use '+' or '-' in front of each item.",
  "   (+:ascending, -:descending, ascending default)",
 +"--sync invoke sync before getting info",
 +"--no-sync  do not invoke sync before getting info (default)",
>>>
>>> I see you're using -Y and -N option, it's better also to show
>>> the short option together, if we will use that short option.
>>
>> Do you think that a short option is necessary?
>> I thought it was not necessary...
> 
> Just mean if we use short option in getopt, it's better to mention it in both 
> man page and help string.
> 
>>
>>>
  NULL
  };

 @@ -301,6 +302,7 @@ static int cmd_qgroup_show(int argc, char **argv)
  u64 qgroupid;
  int filter_flag = 0;
  unsigned unit_mode;
 +int sync = 0;

  struct btrfs_qgroup_comparer_set *comparer_set;
  struct btrfs_qgroup_filter_set *filter_set;
 @@ -313,6 +315,8 @@ static int cmd_qgroup_show(int argc, char **argv)
  int c;
  static const struct option long_options[] = {
  {"sort", required_argument, NULL, 'S'},
 +{"sync", no_argument, NULL, 'Y'},
 +{"no-sync", no_argument, NULL, 'N'},
>>>
>>> Although I'm not sure if "-Y" and "-N" is proper here.
>>> IMHO, it's more like something to say all "Yes" or "No" to any interactive 
>>> confirmation.
>>>
>>> Maybe non-charactor option will be better.
>>
>> Umm, these mean s(Y)nc/(N)osync, and the user can not specify short 
>> options...
>> Still would you rather change to another character?
> 
> If not using short option, it's better to use non-character value instead of 
> 'Y' and 'N'.
> 
> Use any number larger than 256 would do the trick, just like we did in qgroup 
> assign:
> 
> enum { GETOPT_VAL_RESCAN = 256, GETOPT_VAL_NO_RESCAN };
> static const struct option long_options[] = {
> { "rescan", no_argument, NULL, GETOPT_VAL_RESCAN },
> { "no-rescan", no_argument, NULL, GETOPT_VAL_NO_RESCAN },
> { NULL, 0, NULL, 0 }
> };
> int c = getopt_long(argc, argv, "", long_options, NULL);
> 
> BTW, I'm completely OK not to use short option.
> Just the "Y" and "N" a little confusing to me since they are not mentioned 
> anywhere.

OK, thanks. I'll post v2 patch soon.

Thanks,
Tsutomu

> 
> Thanks,
> Qu
>>
>> Thanks,
>> Tsutomu
>>
>>>
>>> Other part looks good to me.
>>>
>>> Thanks,
>>> Qu
>>>
  { NULL, 0, NULL, 0 }
  };

 @@ -348,6 +352,12 @@ static int cmd_qgroup_show(int argc, char **argv)
  if (ret)
  

Couldn't delete a directory after power failure

2016-12-06 Thread Dmitrii Tcvetkov
Hello,

I had this problem today after power failure of the pc. Bitcoin wallet couldn't 
use his database, it said that the database is corrupted, 
so I decided to delete blockchain database from the wallet. But I couldn't 
delete directory "chainstate". 

The problem is solved at the moment I wrote the message to the list by btrfs 
check and I deleted corrupted directory so no need for support :). Just posting 
in case this trace and Sysrq+w may be useful for somebody.

07:28:40-user@host ~ $ btrfs version
btrfs-progs v4.8.5
07:29:00-user@host ~ $ uname -a
Linux host 4.9.0-rc8 #2 SMP PREEMPT Tue Dec 6 23:10:04 MSK 2016 x86_64 GNU/Linux
07:29:10 -user@host ~ $ sudo btrfs fi df storage
Data, single: total=494.00GiB, used=392.04GiB
System, DUP: total=32.00MiB, used=96.00KiB
Metadata, DUP: total=1.50GiB, used=532.12MiB
GlobalReserve, single: total=506.92MiB, used=0.00B
07:29:12-user@host ~ $ sudo btrfs fi sh storage
Label: 'storage'  uuid: f387eb37-f009-4723-9fda-2cc8f94c8b8d
Total devices 1 FS bytes used 392.55GiB
devid1 size 996.26GiB used 497.06GiB path /dev/mapper/container
07:29:12-user@host ~/storage/.bitcoin $ ls -l
drwx-- 1 user user   22250 Dec  7 07:20 chainstate
-rw--- 1 user user   0 Oct 20 23:11 db.log
-rw-r--r-- 1 user user 7513379 Dec  7 07:23 debug.log
-rw--- 1 user user   28534 Dec  6 20:02 fee_estimates.dat
-rw--- 1 user user 4372424 Dec  7 01:56 peers.dat
-rw--- 1 user user  139264 Dec  7 01:57 wallet.dat
07:29:13-user@host ~/storage/.bitcoin $ rm -rf chainstate/
Segmentation fault
07:29:19-user@host ~/storage/.bitcoin $ ls -l
total 4436
drwx-- 1 user user1892 Dec  7 07:29 chainstate
-rw--- 1 user user   0 Oct 20 23:11 db.log
-rw--- 1 user user   28534 Dec  6 20:02 fee_estimates.dat
-rw--- 1 user user 4372424 Dec  7 01:56 peers.dat
-rw--- 1 user user  139264 Dec  7 01:57 wallet.dat
07:29:24-user@host ~/storage/.bitcoin $ rm -rf chainstate/

After that rm hanged, subsequent ls of the chainstate directory also hanged.

dmesg with Sysrq+W included:

[  190.429798] BTRFS: device label storage devid 1 transid 486419 /dev/dm-6
[  190.459791] BTRFS info (device dm-6): enabling auto defrag
[  190.459796] BTRFS info (device dm-6): force lzo compression
[  190.459797] BTRFS info (device dm-6): using free space tree
[  190.459799] BTRFS info (device dm-6): has skinny extents
[  197.896560] BTRFS info (device dm-6): checking UUID tree
[  715.237873] BTRFS error (device dm-6): err add delayed dir index item(index: 
667) into the deletion tree of the delayed node(root id: 3106, inode id: 1613, 
errno: -17)
[  715.237885] [ cut here ]
[  715.239455] kernel BUG at fs/btrfs/delayed-inode.c:1555!
[  715.241014] invalid opcode:  [#1] PREEMPT SMP
[  715.242575] Modules linked in: radeon ttm
[  715.244143] CPU: 6 PID: 2257 Comm: rm Not tainted 4.9.0-rc8 #2
[  715.245750] Hardware name: To be filled by O.E.M. To be filled by 
O.E.M./SABERTOOTH 990FX R2.0, BIOS 2501 04/08/2014
[  715.247352] task: 9134f45c3200 task.stack: 9ebb0392c000
[  715.248931] RIP: 0010:[]  [] 
btrfs_delete_delayed_dir_index+0x219/0x220
[  715.250508] RSP: 0018:9ebb0392fd68  EFLAGS: 00010286
[  715.252114] RAX:  RBX: 91355e687b00 RCX: 
[  715.253706] RDX:  RSI: 91357ed8c7a8 RDI: 91357ed8c7a8
[  715.255301] RBP: 9134cecc8130 R08: 0003a131 R09: 0005
[  715.256904] R10: 0040 R11: b9f6a12d R12: 9134cecc8178
[  715.258544] R13: 029b R14: 913570bbe000 R15: 91356ae3f500
[  715.260149] FS:  7f102b9c6480() GS:91357ed8() 
knlGS:
[  715.261762] CS:  0010 DS:  ES:  CR0: 80050033
[  715.263358] CR2: 006ceff4 CR3: 000290ddd000 CR4: 000406e0
[  715.264892] Stack:
[  715.266422]  0004 4dff913555d705f0 6006 
029b
[  715.267981]  f32e21fa 913546f60a50 9ebb0392fe40 
913546ea44f0
[  715.269546]  00040ffd 064d 9134d45dc5b0 
b928143c
[  715.271134] Call Trace:
[  715.272683]  [] ? __btrfs_unlink_inode+0x1ac/0x4b0
[  715.274246]  [] ? btrfs_unlink_inode+0x12/0x40
[  715.275797]  [] ? btrfs_unlink+0x61/0xb0
[  715.277371]  [] ? vfs_unlink+0xb9/0x180
[  715.278903]  [] ? do_unlinkat+0x28d/0x310
[  715.280426]  [] ? entry_SYSCALL_64_fastpath+0x13/0x94
[  715.281950] Code: ff 0f 0b 48 8b 55 10 49 8b be f0 01 00 00 41 89 c1 4c 8b 
45 00 48 c7 c6 10 b8 95 b9 48 8b 8a 48 03 00 00 4c 89 ea e8 77 55 f7 ff <0f> 0b 
e8 d0 1e de ff 53 48 89 fb e8 c7 d8 ff ff 48 85 c0 74 32 
[  715.283627] RIP  [] 
btrfs_delete_delayed_dir_index+0x219/0x220
[  715.285213]  RSP 
[  715.293587] ---[ end trace fbbdb097ac89a28e ]---
[  808.445004] sysrq: SysRq : Show Blocked State
[  808.445008]   taskPC stack   pid father
[  808.445085] btrfs-transacti D0   936  2 

Re: [PATCH] btrfs-progs: qgroup: add sync option to 'qgroup show'

2016-12-06 Thread Qu Wenruo



At 12/07/2016 12:31 PM, Tsutomu Itoh wrote:

Hi Qu,

Thanks for the review.

On 2016/12/07 12:24, Qu Wenruo wrote:



At 12/07/2016 11:07 AM, Tsutomu Itoh wrote:

The 'qgroup show' command does not synchronize filesystem.
Therefore, 'qgroup show' may not display the correct value unless
synchronized with 'filesystem sync' command etc.

So add the '--sync' and '--no-sync' options so that we can choose
whether or not to synchronize when executing the command.


Indeed, --sync would help in a lot of cases.



Signed-off-by: Tsutomu Itoh 
---
 Documentation/btrfs-qgroup.asciidoc |  5 +
 cmds-qgroup.c   | 24 ++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/Documentation/btrfs-qgroup.asciidoc 
b/Documentation/btrfs-qgroup.asciidoc
index 438dbc7..dd133ec 100644
--- a/Documentation/btrfs-qgroup.asciidoc
+++ b/Documentation/btrfs-qgroup.asciidoc
@@ -126,6 +126,11 @@ Prefix \'+' means ascending order and \'-' means descending 
order of .
 If no prefix is given, use ascending order by default.
 +
 If multiple s is given, use comma to separate.
++
+--sync
+invoke sync before getting info.


A little more words would help, to info user that qgroup will only update after 
sync.


+--no-sync
+do not invoke sync before getting info (default).

 EXIT STATUS
 ---
diff --git a/cmds-qgroup.c b/cmds-qgroup.c
index bc15077..ac339f3 100644
--- a/cmds-qgroup.c
+++ b/cmds-qgroup.c
@@ -272,8 +272,7 @@ static int cmd_qgroup_destroy(int argc, char **argv)
 }

 static const char * const cmd_qgroup_show_usage[] = {
-"btrfs qgroup show -pcreFf "
-"[--sort=qgroupid,rfer,excl,max_rfer,max_excl] ",
+"btrfs qgroup show [options] ",
 "Show subvolume quota groups.",
 "-p print parent qgroup id",
 "-c print child qgroup id",
@@ -288,6 +287,8 @@ static const char * const cmd_qgroup_show_usage[] = {
 "   list qgroups sorted by specified items",
 "   you can use '+' or '-' in front of each item.",
 "   (+:ascending, -:descending, ascending default)",
+"--sync invoke sync before getting info",
+"--no-sync  do not invoke sync before getting info (default)",


I see you're using -Y and -N option, it's better also to show
the short option together, if we will use that short option.


Do you think that a short option is necessary?
I thought it was not necessary...


Just mean if we use short option in getopt, it's better to mention it in 
both man page and help string.







 NULL
 };

@@ -301,6 +302,7 @@ static int cmd_qgroup_show(int argc, char **argv)
 u64 qgroupid;
 int filter_flag = 0;
 unsigned unit_mode;
+int sync = 0;

 struct btrfs_qgroup_comparer_set *comparer_set;
 struct btrfs_qgroup_filter_set *filter_set;
@@ -313,6 +315,8 @@ static int cmd_qgroup_show(int argc, char **argv)
 int c;
 static const struct option long_options[] = {
 {"sort", required_argument, NULL, 'S'},
+{"sync", no_argument, NULL, 'Y'},
+{"no-sync", no_argument, NULL, 'N'},


Although I'm not sure if "-Y" and "-N" is proper here.
IMHO, it's more like something to say all "Yes" or "No" to any interactive 
confirmation.

Maybe non-charactor option will be better.


Umm, these mean s(Y)nc/(N)osync, and the user can not specify short options...
Still would you rather change to another character?


If not using short option, it's better to use non-character value 
instead of 'Y' and 'N'.


Use any number larger than 256 would do the trick, just like we did in 
qgroup assign:


enum { GETOPT_VAL_RESCAN = 256, GETOPT_VAL_NO_RESCAN };
static const struct option long_options[] = {
{ "rescan", no_argument, NULL, GETOPT_VAL_RESCAN },
{ "no-rescan", no_argument, NULL, GETOPT_VAL_NO_RESCAN },
{ NULL, 0, NULL, 0 }
};
int c = getopt_long(argc, argv, "", long_options, NULL);

BTW, I'm completely OK not to use short option.
Just the "Y" and "N" a little confusing to me since they are not 
mentioned anywhere.


Thanks,
Qu


Thanks,
Tsutomu



Other part looks good to me.

Thanks,
Qu


 { NULL, 0, NULL, 0 }
 };

@@ -348,6 +352,12 @@ static int cmd_qgroup_show(int argc, char **argv)
 if (ret)
 usage(cmd_qgroup_show_usage);
 break;
+case 'Y':
+sync = 1;
+break;
+case 'N':
+sync = 0;
+break;
 default:
 usage(cmd_qgroup_show_usage);
 }
@@ -365,6 +375,16 @@ static int cmd_qgroup_show(int argc, char **argv)
 return 1;
 }

+if (sync) {
+ret = ioctl(fd, BTRFS_IOC_SYNC);
+if (ret < 0) {
+error("sync ioctl failed on '%s': %s", path,
+  strerror(errno));
+close_file_or_dir(fd, dirstream);
+goto out;
+}
+}
+
 if (filter_flag) {
 

Re: [PATCH] btrfs-progs: qgroup: add sync option to 'qgroup show'

2016-12-06 Thread Tsutomu Itoh
Hi Qu,

Thanks for the review.

On 2016/12/07 12:24, Qu Wenruo wrote:
> 
> 
> At 12/07/2016 11:07 AM, Tsutomu Itoh wrote:
>> The 'qgroup show' command does not synchronize filesystem.
>> Therefore, 'qgroup show' may not display the correct value unless
>> synchronized with 'filesystem sync' command etc.
>>
>> So add the '--sync' and '--no-sync' options so that we can choose
>> whether or not to synchronize when executing the command.
> 
> Indeed, --sync would help in a lot of cases.
> 
>>
>> Signed-off-by: Tsutomu Itoh 
>> ---
>>  Documentation/btrfs-qgroup.asciidoc |  5 +
>>  cmds-qgroup.c   | 24 ++--
>>  2 files changed, 27 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/btrfs-qgroup.asciidoc 
>> b/Documentation/btrfs-qgroup.asciidoc
>> index 438dbc7..dd133ec 100644
>> --- a/Documentation/btrfs-qgroup.asciidoc
>> +++ b/Documentation/btrfs-qgroup.asciidoc
>> @@ -126,6 +126,11 @@ Prefix \'+' means ascending order and \'-' means 
>> descending order of .
>>  If no prefix is given, use ascending order by default.
>>  +
>>  If multiple s is given, use comma to separate.
>> ++
>> +--sync
>> +invoke sync before getting info.
> 
> A little more words would help, to info user that qgroup will only update 
> after sync.
> 
>> +--no-sync
>> +do not invoke sync before getting info (default).
>>
>>  EXIT STATUS
>>  ---
>> diff --git a/cmds-qgroup.c b/cmds-qgroup.c
>> index bc15077..ac339f3 100644
>> --- a/cmds-qgroup.c
>> +++ b/cmds-qgroup.c
>> @@ -272,8 +272,7 @@ static int cmd_qgroup_destroy(int argc, char **argv)
>>  }
>>
>>  static const char * const cmd_qgroup_show_usage[] = {
>> -"btrfs qgroup show -pcreFf "
>> -"[--sort=qgroupid,rfer,excl,max_rfer,max_excl] ",
>> +"btrfs qgroup show [options] ",
>>  "Show subvolume quota groups.",
>>  "-p print parent qgroup id",
>>  "-c print child qgroup id",
>> @@ -288,6 +287,8 @@ static const char * const cmd_qgroup_show_usage[] = {
>>  "   list qgroups sorted by specified items",
>>  "   you can use '+' or '-' in front of each item.",
>>  "   (+:ascending, -:descending, ascending default)",
>> +"--sync invoke sync before getting info",
>> +"--no-sync  do not invoke sync before getting info (default)",
> 
> I see you're using -Y and -N option, it's better also to show
> the short option together, if we will use that short option.

Do you think that a short option is necessary?
I thought it was not necessary...

> 
>>  NULL
>>  };
>>
>> @@ -301,6 +302,7 @@ static int cmd_qgroup_show(int argc, char **argv)
>>  u64 qgroupid;
>>  int filter_flag = 0;
>>  unsigned unit_mode;
>> +int sync = 0;
>>
>>  struct btrfs_qgroup_comparer_set *comparer_set;
>>  struct btrfs_qgroup_filter_set *filter_set;
>> @@ -313,6 +315,8 @@ static int cmd_qgroup_show(int argc, char **argv)
>>  int c;
>>  static const struct option long_options[] = {
>>  {"sort", required_argument, NULL, 'S'},
>> +{"sync", no_argument, NULL, 'Y'},
>> +{"no-sync", no_argument, NULL, 'N'},
> 
> Although I'm not sure if "-Y" and "-N" is proper here.
> IMHO, it's more like something to say all "Yes" or "No" to any interactive 
> confirmation.
> 
> Maybe non-charactor option will be better.

Umm, these mean s(Y)nc/(N)osync, and the user can not specify short options...
Still would you rather change to another character?

Thanks,
Tsutomu

> 
> Other part looks good to me.
> 
> Thanks,
> Qu
> 
>>  { NULL, 0, NULL, 0 }
>>  };
>>
>> @@ -348,6 +352,12 @@ static int cmd_qgroup_show(int argc, char **argv)
>>  if (ret)
>>  usage(cmd_qgroup_show_usage);
>>  break;
>> +case 'Y':
>> +sync = 1;
>> +break;
>> +case 'N':
>> +sync = 0;
>> +break;
>>  default:
>>  usage(cmd_qgroup_show_usage);
>>  }
>> @@ -365,6 +375,16 @@ static int cmd_qgroup_show(int argc, char **argv)
>>  return 1;
>>  }
>>
>> +if (sync) {
>> +ret = ioctl(fd, BTRFS_IOC_SYNC);
>> +if (ret < 0) {
>> +error("sync ioctl failed on '%s': %s", path,
>> +  strerror(errno));
>> +close_file_or_dir(fd, dirstream);
>> +goto out;
>> +}
>> +}
>> +
>>  if (filter_flag) {
>>  ret = lookup_path_rootid(fd, );
>>  if (ret < 0) {
>>
> 
> 
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: qgroup: add sync option to 'qgroup show'

2016-12-06 Thread Qu Wenruo



At 12/07/2016 11:07 AM, Tsutomu Itoh wrote:

The 'qgroup show' command does not synchronize filesystem.
Therefore, 'qgroup show' may not display the correct value unless
synchronized with 'filesystem sync' command etc.

So add the '--sync' and '--no-sync' options so that we can choose
whether or not to synchronize when executing the command.


Indeed, --sync would help in a lot of cases.



Signed-off-by: Tsutomu Itoh 
---
 Documentation/btrfs-qgroup.asciidoc |  5 +
 cmds-qgroup.c   | 24 ++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/Documentation/btrfs-qgroup.asciidoc 
b/Documentation/btrfs-qgroup.asciidoc
index 438dbc7..dd133ec 100644
--- a/Documentation/btrfs-qgroup.asciidoc
+++ b/Documentation/btrfs-qgroup.asciidoc
@@ -126,6 +126,11 @@ Prefix \'+' means ascending order and \'-' means descending 
order of .
 If no prefix is given, use ascending order by default.
 +
 If multiple s is given, use comma to separate.
++
+--sync
+invoke sync before getting info.


A little more words would help, to info user that qgroup will only 
update after sync.



+--no-sync
+do not invoke sync before getting info (default).

 EXIT STATUS
 ---
diff --git a/cmds-qgroup.c b/cmds-qgroup.c
index bc15077..ac339f3 100644
--- a/cmds-qgroup.c
+++ b/cmds-qgroup.c
@@ -272,8 +272,7 @@ static int cmd_qgroup_destroy(int argc, char **argv)
 }

 static const char * const cmd_qgroup_show_usage[] = {
-   "btrfs qgroup show -pcreFf "
-   "[--sort=qgroupid,rfer,excl,max_rfer,max_excl] ",
+   "btrfs qgroup show [options] ",
"Show subvolume quota groups.",
"-p print parent qgroup id",
"-c print child qgroup id",
@@ -288,6 +287,8 @@ static const char * const cmd_qgroup_show_usage[] = {
"   list qgroups sorted by specified items",
"   you can use '+' or '-' in front of each item.",
"   (+:ascending, -:descending, ascending default)",
+   "--sync invoke sync before getting info",
+   "--no-sync  do not invoke sync before getting info (default)",


I see you're using -Y and -N option, it's better also to show
the short option together, if we will use that short option.


NULL
 };

@@ -301,6 +302,7 @@ static int cmd_qgroup_show(int argc, char **argv)
u64 qgroupid;
int filter_flag = 0;
unsigned unit_mode;
+   int sync = 0;

struct btrfs_qgroup_comparer_set *comparer_set;
struct btrfs_qgroup_filter_set *filter_set;
@@ -313,6 +315,8 @@ static int cmd_qgroup_show(int argc, char **argv)
int c;
static const struct option long_options[] = {
{"sort", required_argument, NULL, 'S'},
+   {"sync", no_argument, NULL, 'Y'},
+   {"no-sync", no_argument, NULL, 'N'},


Although I'm not sure if "-Y" and "-N" is proper here.
IMHO, it's more like something to say all "Yes" or "No" to any 
interactive confirmation.


Maybe non-charactor option will be better.

Other part looks good to me.

Thanks,
Qu


{ NULL, 0, NULL, 0 }
};

@@ -348,6 +352,12 @@ static int cmd_qgroup_show(int argc, char **argv)
if (ret)
usage(cmd_qgroup_show_usage);
break;
+   case 'Y':
+   sync = 1;
+   break;
+   case 'N':
+   sync = 0;
+   break;
default:
usage(cmd_qgroup_show_usage);
}
@@ -365,6 +375,16 @@ static int cmd_qgroup_show(int argc, char **argv)
return 1;
}

+   if (sync) {
+   ret = ioctl(fd, BTRFS_IOC_SYNC);
+   if (ret < 0) {
+   error("sync ioctl failed on '%s': %s", path,
+ strerror(errno));
+   close_file_or_dir(fd, dirstream);
+   goto out;
+   }
+   }
+
if (filter_flag) {
ret = lookup_path_rootid(fd, );
if (ret < 0) {




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs-progs: qgroup: add sync option to 'qgroup show'

2016-12-06 Thread Tsutomu Itoh
The 'qgroup show' command does not synchronize filesystem.
Therefore, 'qgroup show' may not display the correct value unless
synchronized with 'filesystem sync' command etc.

So add the '--sync' and '--no-sync' options so that we can choose
whether or not to synchronize when executing the command.

Signed-off-by: Tsutomu Itoh 
---
 Documentation/btrfs-qgroup.asciidoc |  5 +
 cmds-qgroup.c   | 24 ++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/Documentation/btrfs-qgroup.asciidoc 
b/Documentation/btrfs-qgroup.asciidoc
index 438dbc7..dd133ec 100644
--- a/Documentation/btrfs-qgroup.asciidoc
+++ b/Documentation/btrfs-qgroup.asciidoc
@@ -126,6 +126,11 @@ Prefix \'+' means ascending order and \'-' means 
descending order of .
 If no prefix is given, use ascending order by default.
 +
 If multiple s is given, use comma to separate.
++
+--sync
+invoke sync before getting info.
+--no-sync
+do not invoke sync before getting info (default).
 
 EXIT STATUS
 ---
diff --git a/cmds-qgroup.c b/cmds-qgroup.c
index bc15077..ac339f3 100644
--- a/cmds-qgroup.c
+++ b/cmds-qgroup.c
@@ -272,8 +272,7 @@ static int cmd_qgroup_destroy(int argc, char **argv)
 }
 
 static const char * const cmd_qgroup_show_usage[] = {
-   "btrfs qgroup show -pcreFf "
-   "[--sort=qgroupid,rfer,excl,max_rfer,max_excl] ",
+   "btrfs qgroup show [options] ",
"Show subvolume quota groups.",
"-p print parent qgroup id",
"-c print child qgroup id",
@@ -288,6 +287,8 @@ static const char * const cmd_qgroup_show_usage[] = {
"   list qgroups sorted by specified items",
"   you can use '+' or '-' in front of each item.",
"   (+:ascending, -:descending, ascending default)",
+   "--sync invoke sync before getting info",
+   "--no-sync  do not invoke sync before getting info (default)",
NULL
 };
 
@@ -301,6 +302,7 @@ static int cmd_qgroup_show(int argc, char **argv)
u64 qgroupid;
int filter_flag = 0;
unsigned unit_mode;
+   int sync = 0;
 
struct btrfs_qgroup_comparer_set *comparer_set;
struct btrfs_qgroup_filter_set *filter_set;
@@ -313,6 +315,8 @@ static int cmd_qgroup_show(int argc, char **argv)
int c;
static const struct option long_options[] = {
{"sort", required_argument, NULL, 'S'},
+   {"sync", no_argument, NULL, 'Y'},
+   {"no-sync", no_argument, NULL, 'N'},
{ NULL, 0, NULL, 0 }
};
 
@@ -348,6 +352,12 @@ static int cmd_qgroup_show(int argc, char **argv)
if (ret)
usage(cmd_qgroup_show_usage);
break;
+   case 'Y':
+   sync = 1;
+   break;
+   case 'N':
+   sync = 0;
+   break;
default:
usage(cmd_qgroup_show_usage);
}
@@ -365,6 +375,16 @@ static int cmd_qgroup_show(int argc, char **argv)
return 1;
}
 
+   if (sync) {
+   ret = ioctl(fd, BTRFS_IOC_SYNC);
+   if (ret < 0) {
+   error("sync ioctl failed on '%s': %s", path,
+ strerror(errno));
+   close_file_or_dir(fd, dirstream);
+   goto out;
+   }
+   }
+
if (filter_flag) {
ret = lookup_path_rootid(fd, );
if (ret < 0) {
-- 
2.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs-progs: Fix disable backtrace assert error

2016-12-06 Thread Qu Wenruo
Due to commit 00e769d04c2c83029d6c71(btrfs-progs: Correct value printed
by assertions/BUG_ON/WARN_ON), which changed the assert_trace()
parameter, the condition passed to assert/WARN_ON/BUG_ON are logical
notted for backtrace enabled and disabled case.

Such behavior makes us easier to pass value wrong, and in fact it did
cause us to pass wrong condition for ASSERT().

Instead of passing different conditions for ASSERT/WARN_ON/BUG_ON()
manually, this patch will use BUG_ON() to implement the resting
ASSERT/WARN_ON/BUG(), so we don't need to pass 3 different conditions
but only one.

And to further info the review for the fact that the condition should be
different, rename "assert_trace" to "bugon_trace", as unlike assert, we
will only trigger the bug when condition is true.

Also, move WARN_ON() out of the ifdef branch, as it's completely the
same for both branches.

Cc: Goldwyn Rodrigues 
Signed-off-by: Qu Wenruo 
---
 kerncompat.h | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/kerncompat.h b/kerncompat.h
index e374614..be77608 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -277,7 +277,7 @@ static inline long IS_ERR(const void *ptr)
 #define vfree(x) free(x)
 
 #ifndef BTRFS_DISABLE_BACKTRACE
-static inline void assert_trace(const char *assertion, const char *filename,
+static inline void bugon_trace(const char *assertion, const char *filename,
  const char *func, unsigned line, long val)
 {
if (!val)
@@ -287,17 +287,20 @@ static inline void assert_trace(const char *assertion, 
const char *filename,
exit(1);
 }
 
-#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#defineASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, 
(long)!(c))
-#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1)
+#define BUG_ON(c) bugon_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
 #else
 #define BUG_ON(c) assert(!(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define ASSERT(c) assert(!(c))
-#define BUG() assert(0)
 #endif
 
+#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
+/*
+ * TODO: ASSERT() should be depercated. In case like ASSERT(ret == 0), it
+ * won't output any useful value for ret.
+ * Should be replaced by BUG_ON(ret);
+ */
+#defineASSERT(c) BUG_ON(!(c))
+#define BUG() BUG_ON(1)
+
 #define container_of(ptr, type, member) ({  \
 const typeof( ((type *)0)->member ) *__mptr = (ptr);\
(type *)( (char *)__mptr - offsetof(type,member) );})
-- 
2.10.2



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] btrfs-progs: Correct value printed by assertions/BUG_ON/WARN_ON

2016-12-06 Thread Qu Wenruo



At 12/06/2016 08:44 PM, Goldwyn Rodrigues wrote:



On 12/05/2016 09:08 PM, Qu Wenruo wrote:



At 12/06/2016 10:51 AM, Goldwyn Rodrigues wrote:



On 12/05/2016 08:03 PM, Qu Wenruo wrote:

BTW, the DISABLE_BACKTRACE branch seems quite different from
backtrace one.

#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__,
(long)(c))
#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__,
(long)(c))
#define ASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__,
(long)!(c))
#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1)
#else
#define BUG_ON(c) assert(!(c))
#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__,
(long)(c))
#define ASSERT(c) assert(!(c))
#define BUG() assert(0)

Condition of BUG_ON/ASSERT/BUG are all logical notted for
DISABLE_BACKTRACE.
While WARN_ON() of both branch are the same condition.


WARN_ON is using warning_trace as opposed to assert, and that is the
reason it is not notted.



This seems quite confusing to me.

Any idea to make it more straightforward?



I just kept it the same as before. warning_trace was using an extra
variable, trace, which was not needed because the print_trace was
already in ifndefs.


I mean, better make the condition the same for both BUG/BUG_ON/ASSERT.
So that we don't need to manually logical not the condition.


First of all, ASSERT and BUG_ON have opposite meanings. ASSERT() checks
if the condition is true and continues (halts if false). BUG_ON() "bugs"
if condition is true and halts (continues if false). So you would have
to use opposite conditions.


I know, I mean, for both backtrace disabled and enabled case, the 
condition should be the same.


If not the same condition, it means assert_trace() has different meaning 
than original assert.






For example:
#define ASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__,(long)
(c))
and
#define ASSERT(c) assert((c))

This looks much more straightforward, and easier to expose bug at review
time.


Could you explain with a patch? You idea seems to add more code than
reduce it.


Sure, will submit one soon.

Thanks,
Qu


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: bio linked list corruption.

2016-12-06 Thread Linus Torvalds
On Tue, Dec 6, 2016 at 12:16 AM, Peter Zijlstra  wrote:
>>
>> Of course, I'm really hoping that this shmem.c use is the _only_ such
>> case.  But I doubt it.
>
> $ git grep DECLARE_WAIT_QUEUE_HEAD_ONSTACK | wc -l
> 28

Hmm. Most of them seem to be ok, because they use "wait_event()",
which will always remove itself from the wait-queue. And they do it
from the place that allocates the wait-queue.

IOW, the mm/shmem.c case really was fairly special, because it just
did "prepare_to_wait()", and then did a finish_wait() - and not in the
thread that allocated it on the stack.

So it's really that "some _other_ thread allocated the waitqueue on
the stack, and now we're doing a wait on it" that is bad.

So the normal pattern seems to be:

 - allocate wq on the stack
 - pass it on to a waker
 - wait for it

and that's ok, because as part of "wait for it" we will also be
cleaning things up.

The reason mm/shmem.c was buggy was that it did

 - allocate wq on the stack
 - pass it on to somebody else to wait for
 - wake it up

and *that* is buggy, because it's the waiter, not the waker, that
normally cleans things up.

Is there some good way to find this kind of pattern automatically, I wonder

 Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: crc32c_le performance hit

2016-12-06 Thread Chris Mason

On 12/05/2016 08:34 PM, Chris Murphy wrote:

On Mon, Dec 5, 2016 at 8:46 AM, Chris Mason  wrote:

On 12/04/2016 04:28 PM, Chris Murphy wrote:


4.8.11-300.fc25.x86_64

I'm currently doing a btrfs send/receive and I'm seeing a rather large
hit for crc32c, bigger than aes-ni (the volume is on dm crypt), using
perf top.

  14.03%  btrfs[.] __crc32c_le
  10.50%  [kernel] [k] _aesni_enc4



This is surprising, although send/recv does do a lot of small crc runs. What
is the overall CPU usage?


Before issuing btrfs send, btrfs receive:
%Cpu(s):  0.2 us,  0.2 sy,  0.0 ni, 99.5 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st

During:
%Cpu(s):  5.1 us, 18.2 sy,  0.0 ni, 35.0 id, 40.2 wa,  0.7 hi,  0.8 si,  0.0 st

Full output
https://urldefense.proofpoint.com/v2/url?u=https-3A__paste.fedoraproject.org_500097_=DgIBaQ=5VD0RTtNlTh3ycd41b3MUw=9QPtTAxcitoznaWRKKHoEQ=svwboLaftKw8i8TJ7J-VEs1UtIBPsaEw6-H1ijzbrxQ=t64ReJkaNbpHAPdqzPcXS7rLlACNHhBOt2uJ8-fYjfU=

Lots of waiting, but most of that 18% hit is coming from the two btrfs
commands themselves. The same thing doesn't happen with a btrfs scrub,
which also has to compute crc's albeit just once.


Maybe pin btrfs to a single CPU and use mpstat to
see how hot that one CPU is.  If we're 14% of a CPU running at 100%, that's
a big deal.  If we're 14% of a CPU running at 5%, we safely ignore it.


I'm not sure how to do this, but if all btrfs processes were pinned to
a single core, based on the more than dozen active processes I see in
top during send/receive, it would definitely soak all of that core.


So what should be happening is the btrfs command triggering ioctls that 
assemble the send stream.  If we just pin the btrfs command, I'm hoping 
that will account for most of the crc32c cpu time.  We definitely do 
crcs as we read in data/metadata, but those won't be the teeny 
incrementals that send/recv causes.


Long way of saying that if we use "taskset -c 0 btrfs send", it should 
isolate the send/recv overhead from the normal FS.  We do love our 
helper threads, but it's part of the picture at least.


-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SOLVED] Re: system hangs due to qgroups

2016-12-06 Thread Marc Joliet
On Tuesday 06 December 2016 11:12:12 Marc Joliet wrote:
> I have disabled quotas already (first thing I did after
> mounting).  However,  there were definitely 20-30, maybe more (enough for
> 2, maybe 3, console pages -- I don't know how many lines the initramfs
> rescue shell has, but based on that, you could estimate the number of
> qgroups).

Of course, you can probably check the sanitized images I posted for more 
information.

-- 
Marc Joliet
--
"People who think they know everything really annoy those of us who know we
don't" - Bjarne Stroustrup


signature.asc
Description: This is a digitally signed message part.


Re: [PATCH 10/18] btrfs: root->fs_info cleanup, btrfs_calc_{trans,trunc}_metadata_size

2016-12-06 Thread David Sterba
On Mon, Dec 05, 2016 at 10:50:34AM -0500, Jeff Mahoney wrote:
> On 12/5/16 10:29 AM, David Sterba wrote:
> > On Fri, Dec 02, 2016 at 12:07:30AM -0500, je...@suse.com wrote:
> >> -static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
> >> +static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info 
> >> *fs_info,
> >> unsigned num_items)
> >>  {
> >> -  return root->fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
> >> +  return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
> > 
> > Is there a missing patch that moves 'nodesize' to fs_info? The patch has
> > a minor conflict in the original line where it's just 'root->nodesize',
> > but thre are many compilation faiures due to lack of fs_info::nodesize.
> 
> Yeah, it looks like the list dropped it.  It shows up in the thread
> posted to me.
> 
> I've pushed the series as the for-4.10/misc-4.10 branch in
> my repo at git://git.kernel.org/pub/scm/linux/kernel/git/jeffm/linux-btrfs.git
> 
> It also contains the fix Omar suggested in his review.

Thanks, added to 4.10 queue.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] btrfs: opencode chunk locking, remove helpers

2016-12-06 Thread David Sterba
The helpers are trivial and we don't use them consistently.

Signed-off-by: David Sterba 
---

Refreshed on top of Jeff's cleanup regarding passing fs_info and root to
various functions.

 fs/btrfs/disk-io.c  |  4 +--
 fs/btrfs/extent-tree.c  |  8 +++---
 fs/btrfs/free-space-cache.c |  4 +--
 fs/btrfs/volumes.c  | 70 ++---
 fs/btrfs/volumes.h  | 10 ---
 5 files changed, 43 insertions(+), 53 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 196f1aafcea9..848d5e1c0585 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4003,7 +4003,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
__btrfs_free_block_rsv(root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
 
-   lock_chunks(fs_info);
+   mutex_lock(_info->chunk_mutex);
while (!list_empty(_info->pinned_chunks)) {
struct extent_map *em;
 
@@ -4012,7 +4012,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
list_del_init(>list);
free_extent_map(em);
}
-   unlock_chunks(fs_info);
+   mutex_unlock(_info->chunk_mutex);
 }
 
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2825019cd18f..e97302f437a1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9419,9 +9419,9 @@ int btrfs_inc_block_group_ro(struct btrfs_root *root,
 out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(fs_info, cache->flags);
-   lock_chunks(fs_info);
+   mutex_lock(_info->chunk_mutex);
check_system_chunk(trans, fs_info, alloc_flags);
-   unlock_chunks(fs_info);
+   mutex_unlock(_info->chunk_mutex);
}
mutex_unlock(_info->ro_block_group_mutex);
 
@@ -10458,7 +10458,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle 
*trans,
 
memcpy(, _group->key, sizeof(key));
 
-   lock_chunks(fs_info);
+   mutex_lock(_info->chunk_mutex);
if (!list_empty(>list)) {
/* We're in the transaction->pending_chunks list. */
free_extent_map(em);
@@ -10526,7 +10526,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle 
*trans,
free_extent_map(em);
}
 
-   unlock_chunks(fs_info);
+   mutex_unlock(_info->chunk_mutex);
 
ret = remove_block_group_free_space(trans, fs_info, block_group);
if (ret)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 2e8445e4ffa3..7015892c9ee8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3352,7 +3352,7 @@ void btrfs_put_block_group_trimming(struct 
btrfs_block_group_cache *block_group)
spin_unlock(_group->lock);
 
if (cleanup) {
-   lock_chunks(fs_info);
+   mutex_lock(_info->chunk_mutex);
em_tree = _info->mapping_tree.map_tree;
write_lock(_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
@@ -3364,7 +3364,7 @@ void btrfs_put_block_group_trimming(struct 
btrfs_block_group_cache *block_group)
 */
remove_extent_mapping(em_tree, em);
write_unlock(_tree->lock);
-   unlock_chunks(fs_info);
+   mutex_unlock(_info->chunk_mutex);
 
/* once for us and once for the tree */
free_extent_map(em);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cf3838604987..bb8592e1a364 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1890,10 +1890,10 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, char 
*device_path, u64 devid)
}
 
if (device->writeable) {
-   lock_chunks(fs_info);
+   mutex_lock(_info->chunk_mutex);
list_del_init(>dev_alloc_list);
device->fs_devices->rw_devices--;
-   unlock_chunks(fs_info);
+   mutex_unlock(_info->chunk_mutex);
clear_super = true;
}
 
@@ -1982,11 +1982,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, char 
*device_path, u64 devid)
 
 error_undo:
if (device->writeable) {
-   lock_chunks(fs_info);
+   mutex_lock(_info->chunk_mutex);
list_add(>dev_alloc_list,
 _info->fs_devices->alloc_list);
device->fs_devices->rw_devices++;
-   unlock_chunks(fs_info);
+   mutex_unlock(_info->chunk_mutex);
}
goto out;
 }
@@ -2211,9 +2211,9 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info 
*fs_info)
list_for_each_entry(device, _devices->devices, dev_list)
device->fs_devices = seed_devices;
 
-   lock_chunks(fs_info);
+   

Re: Convert from RAID 5 to 10

2016-12-06 Thread Florian Lindner
Thanks a lot to all who replied! I learned a lot from this thread.

However, what I learned has made me even more doubtful that a btrfs RAID is the 
right choice for me at this moment.
There seems to be much uncertainty about the real state (experimental, stable, 
production-ready, mature, ...) of btrfs'
raid implementation, even on this very well informed lists.

I really want to have the checksumming and auto-repair feature of btrfs. That 
was the original reason why I did't go
with a dmraid in the first place. So there are basically two options left, 
btrfs, with a raid 10 or zfs with some raid
10 or 5 equivalent.

zfs seems to be nice, mature solution, but I also prefer to use something 
native to Linux.

Best Regards,
Florian


Am 29.11.2016 um 18:20 schrieb Florian Lindner:
> Hello,
> 
> I have 4 harddisks with 3TB capacity each. They are all used in a btrfs RAID 
> 5. It has come to my attention, that there
> seem to be major flaws in btrfs' raid 5 implementation. Because of that, I 
> want to convert the the raid 5 to a raid 10
> and I have several questions.
> 
> * Is that possible as an online conversion?
> 
> * Since my effective capacity will shrink during conversions, does btrfs 
> check if there is enough free capacity to
> convert? As you see below, right now it's probably too full, but I'm going to 
> delete some stuff.
> 
> * I understand the command to convert is
> 
> btrfs balance start -dconvert=raid10 -mconvert=raid10 /mnt
> 
> Correct?
> 
> * What disks are allowed to fail? My understanding of a raid 10 is like that
> 
> disks = {a, b, c, d}
> 
> raid0( raid1(a, b), raid1(c, d) )
> 
> This way (a XOR b) AND (c XOR d) are allowed to fail without the raid to fail 
> (either a or b and c or d are allowed to fail)
> 
> How is that with a btrfs raid 10?
> 
> * Any other advice? ;-)
> 
> Thanks a lot,
> 
> Florian
> 
> 
> Some information of my filesystem:
> 
> # btrfs filesystem show /
> Label: 'data'  uuid: 57e5b9e9-01ae-4f9e-8a3d-9f42204d7005
> Total devices 4 FS bytes used 7.57TiB
> devid1 size 2.72TiB used 2.72TiB path /dev/sda4
> devid2 size 2.72TiB used 2.72TiB path /dev/sdb4
> devid3 size 2.72TiB used 2.72TiB path /dev/sdc4
> devid4 size 2.72TiB used 2.72TiB path /dev/sdd4
> 
> # btrfs filesystem df /
> Data, RAID5: total=8.14TiB, used=7.56TiB
> System, RAID5: total=96.00MiB, used=592.00KiB
> Metadata, RAID5: total=12.84GiB, used=11.06GiB
> GlobalReserve, single: total=512.00MiB, used=0.00B
> 
> # df -h
> Filesystem  Size  Used Avail Use% Mounted on
> 
> /dev/sda411T  7.6T  597G  93% /
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Free space missing

2016-12-06 Thread Mike Fleetwood
On 6 December 2016 at 12:40, Кравцов Роман Владимирович
 wrote:
> Hello.
>
> Why 'BTRFS used size' and 'du -s' are different?
>
>
> [root@OraCI2 ~]# btrfs --version
> btrfs-progs v4.8.3
> [root@OraCI2 ~]# btrfs fi show /dev/nvme0n1
> Label: 'OLD_PES'  uuid: bd542f37-6baa-4af8-b87a-20d4e335e4b3
> Total devices 1 FS bytes used 2.53TiB
> devid1 size 2.91TiB used 2.90TiB path /dev/nvme0n1
> [root@OraCI2 ~]# btrfs fi usage /mnt/OLD/
> Overall:
> Device size:   2.91TiB
> Device allocated:   2.90TiB
> Device unallocated:   9.97GiB
> Device missing: 0.00B
> Used:   2.53TiB
> Free (estimated): 385.31GiB(min: 385.31GiB)
> Data ratio:  1.00
> Metadata ratio:  1.00
> Global reserve: 512.00MiB(used: 0.00B)
>
> Data,single: Size:2.89TiB, Used:2.53TiB <-
>/dev/nvme0n1   2.89TiB
>
> Metadata,single: Size:6.01GiB, Used:3.52GiB
>/dev/nvme0n1   6.01GiB
>
> System,single: Size:32.00MiB, Used:432.00KiB
>/dev/nvme0n1  32.00MiB
>
> Unallocated:
>/dev/nvme0n1   9.97GiB
>
> [root@OraCI2 ~]# df -h | grep nvme0n1
> /dev/nvme0n1  3,0T 2,6T 386G
> 88% /mnt/OLD
> [root@OraCI2 ~]# du -sh /mnt/OLD/
> 2,0T/mnt/OLD/<-
> [root@OraCI2 ~]#
> /dev/nvme0n1 on /mnt/OLD type btrfs
> (rw,relatime,ssd,discard,space_cache,subvolid=5,subvol=/)
>
> WBR,
> Roman Kravtsov

These entries in the FAQ should help
https://btrfs.wiki.kernel.org/index.php/FAQ#How_much_free_space_do_I_have.3F

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


*** Some devices missing ***

2016-12-06 Thread Luescher Claude

Hi,

I have the following array:

Label: 'backup_fs'  uuid: x
Total devices 10 FS bytes used 13.46TiB
devid1 size 3.64TiB used 3.32TiB path /dev/sdf
devid2 size 3.64TiB used 3.32TiB path /dev/sde
devid3 size 3.64TiB used 3.32TiB path /dev/sdd
devid4 size 3.64TiB used 3.32TiB path /dev/sdg
devid5 size 3.64TiB used 3.33TiB path /dev/sdh
devid6 size 3.64TiB used 3.33TiB path /dev/sdi
devid7 size 3.64TiB used 3.33TiB path /dev/sdc
devid8 size 5.46TiB used 1.82TiB path /dev/sdb
devid9 size 5.46TiB used 1.82TiB path /dev/sdj
*** Some devices missing

The server was running 3.13 kernel and btrfs progs 4.0  until it 
crashed. After that mounting the file system hang forever so I decided 
to switch to Linux Kernel 4.8.11 and btrfs-progs v4.8.5.


There are of course no missing devices.  This is what I get in dmesg 
when I try to mount the array with or without the degraded option:


[  362.906958] BTRFS info (device sdf): disk space caching is enabled
[  363.066379] BTRFS error (device sdf): super_num_devices 10 mismatch 
with num_devices 9 found here

[  363.067095] BTRFS error (device sdf): failed to read chunk tree: -22
[  363.086089] BTRFS: open_ctree failed
[  438.747338] BTRFS info (device sdf): allowing degraded mounts
[  438.747341] BTRFS info (device sdf): disk space caching is enabled
[  438.993190] BTRFS error (device sdf): super_num_devices 10 mismatch 
with num_devices 9 found here

[  438.993914] BTRFS error (device sdf): failed to read chunk tree: -22
[  439.013552] BTRFS: open_ctree failed


I only find an old 2014 list entry that it was some bug and fixed so it 
should be long time fixed in 4.8.5. I need to put the device back to 
operation as soon as possible. Anybody have any further ideas what can I 
do?


I running btrfsck on the array and at least that works, but I doubt that 
it will do anything useful.


Thanks
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Free space missing

2016-12-06 Thread Кравцов Роман Владимирович

Hello.

Why 'BTRFS used size' and 'du -s' are different?


[root@OraCI2 ~]# btrfs --version
btrfs-progs v4.8.3
[root@OraCI2 ~]# btrfs fi show /dev/nvme0n1
Label: 'OLD_PES'  uuid: bd542f37-6baa-4af8-b87a-20d4e335e4b3
Total devices 1 FS bytes used 2.53TiB
devid1 size 2.91TiB used 2.90TiB path /dev/nvme0n1
[root@OraCI2 ~]# btrfs fi usage /mnt/OLD/
Overall:
Device size:   2.91TiB
Device allocated:   2.90TiB
Device unallocated:   9.97GiB
Device missing: 0.00B
Used:   2.53TiB
Free (estimated): 385.31GiB(min: 385.31GiB)
Data ratio:  1.00
Metadata ratio:  1.00
Global reserve: 512.00MiB(used: 0.00B)

Data,single: Size:2.89TiB, Used:2.53TiB <-
   /dev/nvme0n1   2.89TiB

Metadata,single: Size:6.01GiB, Used:3.52GiB
   /dev/nvme0n1   6.01GiB

System,single: Size:32.00MiB, Used:432.00KiB
   /dev/nvme0n1  32.00MiB

Unallocated:
   /dev/nvme0n1   9.97GiB

[root@OraCI2 ~]# df -h | grep nvme0n1
/dev/nvme0n1  3,0T 2,6T 
386G   88% /mnt/OLD

[root@OraCI2 ~]# du -sh /mnt/OLD/
2,0T/mnt/OLD/<-
[root@OraCI2 ~]#
/dev/nvme0n1 on /mnt/OLD type btrfs 
(rw,relatime,ssd,discard,space_cache,subvolid=5,subvol=/)


WBR,
Roman Kravtsov

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] btrfs-progs: Correct value printed by assertions/BUG_ON/WARN_ON

2016-12-06 Thread Goldwyn Rodrigues


On 12/05/2016 09:08 PM, Qu Wenruo wrote:
> 
> 
> At 12/06/2016 10:51 AM, Goldwyn Rodrigues wrote:
>>
>>
>> On 12/05/2016 08:03 PM, Qu Wenruo wrote:
>>> BTW, the DISABLE_BACKTRACE branch seems quite different from
>>> backtrace one.
>>>
>>> #define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__,
>>> (long)(c))
>>> #define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__,
>>> (long)(c))
>>> #define ASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__,
>>> (long)!(c))
>>> #define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1)
>>> #else
>>> #define BUG_ON(c) assert(!(c))
>>> #define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__,
>>> (long)(c))
>>> #define ASSERT(c) assert(!(c))
>>> #define BUG() assert(0)
>>>
>>> Condition of BUG_ON/ASSERT/BUG are all logical notted for
>>> DISABLE_BACKTRACE.
>>> While WARN_ON() of both branch are the same condition.
>>
>> WARN_ON is using warning_trace as opposed to assert, and that is the
>> reason it is not notted.
>>
>>>
>>> This seems quite confusing to me.
>>>
>>> Any idea to make it more straightforward?
>>>
>>
>> I just kept it the same as before. warning_trace was using an extra
>> variable, trace, which was not needed because the print_trace was
>> already in ifndefs.
> 
> I mean, better make the condition the same for both BUG/BUG_ON/ASSERT.
> So that we don't need to manually logical not the condition.

First of all, ASSERT and BUG_ON have opposite meanings. ASSERT() checks
if the condition is true and continues (halts if false). BUG_ON() "bugs"
if condition is true and halts (continues if false). So you would have
to use opposite conditions.

> 
> For example:
> #define ASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__,(long)
> (c))
> and
> #define ASSERT(c) assert((c))
> 
> This looks much more straightforward, and easier to expose bug at review
> time.

Could you explain with a patch? You idea seems to add more code than
reduce it.


-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SOLVED] Re: system hangs due to qgroups

2016-12-06 Thread Marc Joliet
On Tuesday 06 December 2016 08:29:48 Qu Wenruo wrote:
> At 12/05/2016 10:43 PM, Marc Joliet wrote:
> > On Monday 05 December 2016 12:01:28 Marc Joliet wrote:
> >>> This seems to be a NULL pointer bug in qgroup relocation fix.
> >>> 
> >>> 
> >>> 
> >>> The latest fix (not merged yet) should address it.
> >>> 
> >>> 
> >>> 
> >>> You could try the for-next-20161125 branch from David to fix it:
> >>> https://github.com/kdave/btrfs-devel/tree/for-next-20161125
> >> 
> >> OK, I'll try that, thanks!  I just have to wait for it to finish
> >> cloning...
> > 
> > [...]
> > 
> >>> And for your recovery, I'd suggest to install an Archlinux into a USB
> >>> HDD or USB stick, and compile David's branch and install it into the USB
> >>> HDD.
> >>> 
> >>> 
> >>> 
> >>> Then use the USB storage as rescue tool to mount the fs, which should do
> >>> RW mount with or without skip_balance mount option.
> >>> So you could disable quota then.
> >> 
> >> OK, I'll try that, thanks!
> > 
> > Excellent, thank you, that worked!  My laptop is working normally again. 
> > I'll keep an eye on it, but so far two balance operations ran normally
> > (that is, they completed within a few minutes and without hanging the
> > system).
> > 
> > (Specifically, since I didn't find out how to get a different kernel onto
> > the Arch USB stick, I simply installed the kernel on my desktop, then did
> > everything from an initramfs emergency shell, then moved the SSD back
> > into the laptop.)
> > 
> > Thanks, everyone!
> 
> Glad that helped.
> 
> I just forgot that you're using gentoo, not archlinux, and kernel
> install script won't work for archlinux.
> 
> Anyway, I'm glad that works for you.
> 
> BTW, if you haven't yet disable quota, would you please give a report on
> how many qgroup you have?

I have disabled quotas already (first thing I did after mounting).  However, 
there were definitely 20-30, maybe more (enough for 2, maybe 3, console pages 
-- I don't know how many lines the initramfs rescue shell has, but based on 
that, you could estimate the number of qgroups).

> And how CPU is spinning for balancing with quota enabled?

All I can say is, based on past observations, that I would see a single 
process (usually btrfs-transaction, but often a user-space process, such as 
baloo_file_extractor) using a single CPU at 100% and blocking (almost) 
everything else, and either finish after a while if it was quick enough, or 
there would be intermittent time frames where other processes weren't blocked.  
With balancing the behaviour was the latter, only it was the btrfs process 
using 100% CPU.  Furthermore, metadata balances were worse than data balances.

> This would help us to evaluate how qgroup slows down the process if
> there are too many snapshots.

Again, sorry that I was so quick to disable quotas, but I was only willing to 
do so much debugging with this laptop.

> Thanks,
> Qu

Greetings
-- 
Marc Joliet
--
"People who think they know everything really annoy those of us who know we
don't" - Bjarne Stroustrup


signature.asc
Description: This is a digitally signed message part.


Re: bio linked list corruption.

2016-12-06 Thread Vegard Nossum
On 5 December 2016 at 22:33, Vegard Nossum  wrote:
> On 5 December 2016 at 21:35, Linus Torvalds
>  wrote:
>> Note for Ingo and Peter: this patch has not been tested at all. But
>> Vegard did test an earlier patch of mine that just verified that yes,
>> the issue really was that wait queue entries remained on the wait
>> queue head just as we were about to return and free it.
>
> The second patch has been running for 1h+ without any problems of any
> kind. I should typically have seen 2 crashes by now. I'll let it run
> overnight to be sure.

Alright, so nearly 12 hours later I don't see either the new warning
or the original crash at all, so feel free to add:

Tested-by: Vegard Nossum .

That said, my 8 VMs had all panicked in some way due to OOMs (which is
new since v4.8), although some got page allocation stalls for >20s and
died because "khugepaged blocked for more than 120 seconds", others
got "Out of memory and no killable processes".


Vegard
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: bio linked list corruption.

2016-12-06 Thread Ingo Molnar

* Peter Zijlstra  wrote:

> $ git grep DECLARE_WAIT_QUEUE_HEAD_ONSTACK | wc -l
> 28

This debug facility looks sensible. A couple of minor suggestions:

> --- a/include/linux/wait.h
> +++ b/include/linux/wait.h
> @@ -39,6 +39,9 @@ struct wait_bit_queue {
>  struct __wait_queue_head {
>   spinlock_t  lock;
>   struct list_headtask_list;
> +#ifdef CONFIG_DEBUG_WAITQUEUE
> + int onstack;
> +#endif

The structure will pack better in the debug-enabled case if 'onstack' is next 
to 
'lock', as spinlock_t is 4 bytes on many architectures.

> -#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {
> \
> +#ifdef CONFIG_DEBUG_WAITQUEUE
> +#define ___WAIT_QUEUE_ONSTACK(onstack)   .onstack = (onstack),
> +#else
> +#define ___WAIT_QUEUE_ONSTACK(onstack)
> +#endif

Please help readers by showing the internal structure of the definition:

#ifdef CONFIG_DEBUG_WAITQUEUE
# define ___WAIT_QUEUE_ONSTACK(onstack) .onstack = (onstack),
#else
# define ___WAIT_QUEUE_ONSTACK(onstack)
#endif


> +static inline void prepare_debug(wait_queue_head_t *q, wait_queue_t *wait)
> +{
> +#ifdef CONFIG_DEBUG_WAITQUEUE
> + WARN_ON_ONCE(q->onstack && wait->func == autoremove_wake_function)
> +#endif
> +}

I'd name this debug_waitqueue_check() or such - as the 'prepare' is a bit 
misleadig (we don't prepare debugging, we do the debug check here).

> +config DEBUG_WAITQUEUE
> + bool "Debug waitqueue"
> + depends on DEBUG_KERNEL

I'd name it DEBUG_SCHED_WAITQUEUE=y and I'd also make it depend on 
CONFIG_DEBUG_SCHED.

LGTM otherwise!

Thanks,

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] generic/35[67]: disable swapfile tests on Btrfs

2016-12-06 Thread Darrick J. Wong
On Mon, Dec 05, 2016 at 05:01:28PM -0800, Omar Sandoval wrote:
> From: Omar Sandoval 
> 
> Btrfs doesn't support swapfiles (yet?), so generic/356 fails
> erroneously, and generic/357 only passes by accident. Let's add a
> _require_scratch_swapfile helper and add it to these tests.

Hehe, good catch. :)
Reviewed-by: Darrick J. Wong 

--D

> 
> Signed-off-by: Omar Sandoval 
> ---
> I have some code enabling swapfiles for Btrfs [1], but there's some ABBA
> deadlock issues with i_rwsem and mmap_sem on swap-over-NFS that I
> haven't had time to sort out. In the meantime, let's just skip these
> tests.
> 
> 1: https://github.com/osandov/linux/tree/btrfs-swap
> 
>  common/rc | 22 ++
>  tests/generic/356 |  1 +
>  tests/generic/357 |  1 +
>  3 files changed, 24 insertions(+)
> 
> diff --git a/common/rc b/common/rc
> index 2719b23..d863e56 100644
> --- a/common/rc
> +++ b/common/rc
> @@ -1790,6 +1790,28 @@ _require_odirect()
>   rm -f $testfile 2>&1 > /dev/null
>  }
>  
> +# Check that the filesystem supports swapfiles
> +_require_scratch_swapfile()
> +{
> + _require_scratch
> +
> + _scratch_mkfs >/dev/null
> + _scratch_mount
> +
> + # Minimum size for mkswap is 10 pages
> + local size=$(($(get_page_size) * 10))
> +
> + _pwrite_byte 0x61 0 "$size" "$SCRATCH_MNT/swap" >/dev/null 2>&1
> + mkswap "$SCRATCH_MNT/swap" >/dev/null 2>&1
> + if ! swapon "$SCRATCH_MNT/swap" >/dev/null 2>&1; then
> + _scratch_unmount
> + _notrun "swapfiles are not supported"
> + fi
> +
> + swapoff "$SCRATCH_MNT/swap" >/dev/null 2>&1
> + _scratch_unmount
> +}
> +
>  # Check that a fs has enough free space (in 1024b blocks)
>  #
>  _require_fs_space()
> diff --git a/tests/generic/356 b/tests/generic/356
> index 6bb90c0..51eeb65 100755
> --- a/tests/generic/356
> +++ b/tests/generic/356
> @@ -44,6 +44,7 @@ _cleanup()
>  
>  # real QA test starts here
>  _supported_os Linux
> +_require_scratch_swapfile
>  _require_scratch_reflink
>  _require_cp_reflink
>  
> diff --git a/tests/generic/357 b/tests/generic/357
> index 439b314..0dd0c10 100755
> --- a/tests/generic/357
> +++ b/tests/generic/357
> @@ -44,6 +44,7 @@ _cleanup()
>  
>  # real QA test starts here
>  _supported_os Linux
> +_require_scratch_swapfile
>  _require_scratch_reflink
>  _require_cp_reflink
>  
> -- 
> 2.10.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: bio linked list corruption.

2016-12-06 Thread Peter Zijlstra
On Mon, Dec 05, 2016 at 12:35:52PM -0800, Linus Torvalds wrote:
> Adding the scheduler people to the participants list, and re-attaching
> the patch, because while this patch is internal to the VM code, the
> issue itself is not.
> 
> There might well be other cases where somebody goes "wake_up_all()"
> will wake everybody up, so I can put the wait queue head on the stack,
> and then after I've woken people up I can return".
> 
> Ingo/PeterZ: the reason that does *not* work is that "wake_up_all()"
> does make sure that everybody is woken up, but the usual autoremove
> wake function only removes the wakeup entry if the process was woken
> up by that particular wakeup. If something else had woken it up, the
> entry remains on the list, and the waker in this case returned with
> the wait head still containing entries.
> 
> Which is deadly when the wait queue head is on the stack.

Yes, very much so.

> So I'm wondering if we should make that "synchronous_wake_function()"
> available generally, and maybe introduce a DEFINE_WAIT_SYNC() helper
> that uses it.

We could also do some debug code that tracks the ONSTACK ness and warns
on autoremove. Something like the below, equally untested.

> 
> Of course, I'm really hoping that this shmem.c use is the _only_ such
> case.  But I doubt it.

$ git grep DECLARE_WAIT_QUEUE_HEAD_ONSTACK | wc -l
28

---


diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2408e8d5c05c..199faaa89847 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -39,6 +39,9 @@ struct wait_bit_queue {
 struct __wait_queue_head {
spinlock_t  lock;
struct list_headtask_list;
+#ifdef CONFIG_DEBUG_WAITQUEUE
+   int onstack;
+#endif
 };
 typedef struct __wait_queue_head wait_queue_head_t;
 
@@ -56,9 +59,18 @@ struct task_struct;
 #define DECLARE_WAITQUEUE(name, tsk)   \
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
 
-#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {  \
+#ifdef CONFIG_DEBUG_WAITQUEUE
+#define ___WAIT_QUEUE_ONSTACK(onstack) .onstack = (onstack),
+#else
+#define ___WAIT_QUEUE_ONSTACK(onstack)
+#endif
+
+#define ___WAIT_QUEUE_HEAD_INITIALIZER(name, onstack)  {   \
.lock   = __SPIN_LOCK_UNLOCKED(name.lock),  \
-   .task_list  = { &(name).task_list, &(name).task_list } }
+   .task_list  = { &(name).task_list, &(name).task_list }, \
+   ___WAIT_QUEUE_ONSTACK(onstack) }
+
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name) 
___WAIT_QUEUE_HEAD_INITIALIZER(name, 0)
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
@@ -80,11 +92,12 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, 
const char *name, struct
 
 #ifdef CONFIG_LOCKDEP
 # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
-   ({ init_waitqueue_head(); name; })
+   ({ init_waitqueue_head(); (name).onstack = 1; name; })
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
 #else
-# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
+# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
+   wait_queue_head_t name = ___WAIT_QUEUE_HEAD_INITIALIZER(name, 1)
 #endif
 
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 9453efe9b25a..746d00117d08 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -156,6 +156,13 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int 
mode, int nr_exclusive)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
 
+static inline void prepare_debug(wait_queue_head_t *q, wait_queue_t *wait)
+{
+#ifdef CONFIG_DEBUG_WAITQUEUE
+   WARN_ON_ONCE(q->onstack && wait->func == autoremove_wake_function)
+#endif
+}
+
 /*
  * Note: we use "set_current_state()" _after_ the wait-queue add,
  * because we need a memory barrier there on SMP, so that any
@@ -178,6 +185,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, 
int state)
if (list_empty(>task_list))
__add_wait_queue(q, wait);
set_current_state(state);
+   prepare_debug(q, wait);
spin_unlock_irqrestore(>lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
@@ -192,6 +200,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, 
wait_queue_t *wait, int state)
if (list_empty(>task_list))
__add_wait_queue_tail(q, wait);
set_current_state(state);
+   prepare_debug(q, wait);
spin_unlock_irqrestore(>lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
@@ -235,6 +244,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, 
wait_queue_t *wait, int state)
}
set_current_state(state);
}
+   prepare_debug(q, wait);
spin_unlock_irqrestore(>lock,