Re: [PATCH] btrfs-progs: fix gcc8 default build warning caused by '-Wformat-truncation'

2018-10-25 Thread Qu Wenruo


On 2018/10/26 上午9:58, Su Yanjun wrote:
> When using gcc8 compiles utils.c, it complains as below:
> 
> utils.c:852:45: warning: '%s' directive output may be truncated writing
> up to 4095 bytes into a region of size 4084 [-Wformat-truncation=]
>snprintf(path, sizeof(path), "/dev/mapper/%s", name);
>  ^~   

As long as we're using snprintf(), we expect some string to be
truncated, so it's not a problem at all.

> In file included from /usr/include/stdio.h:873,
>  from utils.c:20:
> /usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk'
> output between 13 and 4108 bytes into a destination of size 4096
>return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
>   ^~~~
> __bos (__s), __fmt, __va_arg_pack ());
> ~
> 
> This isn't a type of warning we care about, particularly when PATH_MAX
> is much less than either.
> 
> Using the GCC option -Wno-format-truncation to disable this for default
> build.
> 
> Signed-off-by: Su Yanjun 

Reviewed-by: Qu Wenruo 

Thanks,
Qu

> ---
>  Makefile.extrawarn | 1 +
>  configure.ac   | 2 +-
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/Makefile.extrawarn b/Makefile.extrawarn
> index 1f4bda94a167..ed76fb5b5554 100644
> --- a/Makefile.extrawarn
> +++ b/Makefile.extrawarn
> @@ -47,6 +47,7 @@ warning-1 += -Wold-style-definition
>  warning-1 += $(call cc-option, -Wmissing-include-dirs)
>  warning-1 += $(call cc-option, -Wunused-but-set-variable)
>  warning-1 += $(call cc-disable-warning, missing-field-initializers)
> +warning-1 += $(call cc-option, -Wformat-truncation)
>  
>  warning-2 := -Waggregate-return
>  warning-2 += -Wcast-align
> diff --git a/configure.ac b/configure.ac
> index df02f20655d9..c626beca8b77 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -12,7 +12,7 @@ LIBBTRFS_MAJOR=0
>  LIBBTRFS_MINOR=1
>  LIBBTRFS_PATCHLEVEL=2
>  
> -CFLAGS=${CFLAGS:-"-g -O1 -Wall -D_FORTIFY_SOURCE=2"}
> +CFLAGS=${CFLAGS:-"-g -O1 -Wall -D_FORTIFY_SOURCE=2 -Wno-format-truncation"}
>  AC_SUBST([CFLAGS])
>  
>  AC_PREREQ([2.60])
> 



signature.asc
Description: OpenPGP digital signature


[PATCH] btrfs-progs: fix gcc8 default build warning caused by '-Wformat-truncation'

2018-10-25 Thread Su Yanjun
When using gcc8 compiles utils.c, it complains as below:

utils.c:852:45: warning: '%s' directive output may be truncated writing
up to 4095 bytes into a region of size 4084 [-Wformat-truncation=]
   snprintf(path, sizeof(path), "/dev/mapper/%s", name);
 ^~   
In file included from /usr/include/stdio.h:873,
 from utils.c:20:
/usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk'
output between 13 and 4108 bytes into a destination of size 4096
   return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
  ^~~~
__bos (__s), __fmt, __va_arg_pack ());
~

This isn't a type of warning we care about, particularly when PATH_MAX
is much less than either.

Using the GCC option -Wno-format-truncation to disable this for default
build.

Signed-off-by: Su Yanjun 
---
 Makefile.extrawarn | 1 +
 configure.ac   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile.extrawarn b/Makefile.extrawarn
index 1f4bda94a167..ed76fb5b5554 100644
--- a/Makefile.extrawarn
+++ b/Makefile.extrawarn
@@ -47,6 +47,7 @@ warning-1 += -Wold-style-definition
 warning-1 += $(call cc-option, -Wmissing-include-dirs)
 warning-1 += $(call cc-option, -Wunused-but-set-variable)
 warning-1 += $(call cc-disable-warning, missing-field-initializers)
+warning-1 += $(call cc-option, -Wformat-truncation)
 
 warning-2 := -Waggregate-return
 warning-2 += -Wcast-align
diff --git a/configure.ac b/configure.ac
index df02f20655d9..c626beca8b77 100644
--- a/configure.ac
+++ b/configure.ac
@@ -12,7 +12,7 @@ LIBBTRFS_MAJOR=0
 LIBBTRFS_MINOR=1
 LIBBTRFS_PATCHLEVEL=2
 
-CFLAGS=${CFLAGS:-"-g -O1 -Wall -D_FORTIFY_SOURCE=2"}
+CFLAGS=${CFLAGS:-"-g -O1 -Wall -D_FORTIFY_SOURCE=2 -Wno-format-truncation"}
 AC_SUBST([CFLAGS])
 
 AC_PREREQ([2.60])
-- 
2.19.1





Re: Kernel crash related to LZO compression

2018-10-25 Thread Qu Wenruo


On 2018/10/25 下午11:56, Dmitry Katsubo wrote:
> Dear btrfs community,
> 
> My excuses for the dumps for rather old kernel (4.9.25), nevertheless I
> wonder
> about your opinion about the below reported kernel crashes.
> 
> As I could understand the situation (correct me if I am wrong), it happened
> that some data block became corrupted which resulted the following
> kernel trace
> during the boot:
> 
> kernel BUG at /build/linux-fB36Cv/linux-4.9.25/fs/btrfs/extent_io.c:2318!
> invalid opcode:  [#1] SMP
> Call Trace:
>  [] ? end_bio_extent_readpage+0x4e9/0x680 [btrfs]
>  [] ? end_compressed_bio_read+0x3b/0x2d0 [btrfs]
>  [] ? btrfs_scrubparity_helper+0xce/0x2d0 [btrfs]
>  [] ? process_one_work+0x141/0x380
>  [] ? worker_thread+0x41/0x460
>  [] ? kthread+0xb4/0xd0
>  [] ? process_one_work+0x380/0x380
>  [] ? kthread_park+0x50/0x50
>  [] ? ret_from_fork+0x1b/0x28
> 
> The problematic file turned out to be the one used by systemd-journald
> /var/log/journal/c496cea41ebc4700a0dfaabf64a21be4/system.journal
> which was trying to read it (or append to it) during the boot and that was
> causing the system crash (see attached bootN_dmesg.txt).
> 
> I've rebooted in safe mode and tried to copy the data from this
> partition to
> another location using btrfs-restore, however kernel was crashing as
> well with
> a bit different symphom (see attached copyN_dmesg.txt):
> 
> Call Trace:
>  [] ? lzo_decompress_biovec+0x1b0/0x2b0 [btrfs]
>  [] ? vmalloc+0x38/0x40
>  [] ? end_compressed_bio_read+0x265/0x2d0 [btrfs]
>  [] ? btrfs_scrubparity_helper+0xce/0x2d0 [btrfs]
>  [] ? process_one_work+0x141/0x380
>  [] ? worker_thread+0x41/0x460
>  [] ? kthread+0xb4/0xd0
>  [] ? ret_from_fork+0x1b/0x28
> 
> Just to keep away from the problem, I've removed this file and also removed
> "compress=lzo" mount option.
> 
> Are there any updates / fixes done in that area? Is lzo option safe to use?

Yes, we have commits to harden lzo decompress code in v4.18:

de885e3ee281a88f52283c7e8994e762e3a5f6bd btrfs: lzo: Harden inline lzo
compressed extent decompression
314bfa473b6b6d3efe68011899bd718b349f29d7 btrfs: lzo: Add header length
check to avoid potential out-of-bounds acc

And for the root cause, it's compressed data without csum, then scrub
could make it corrupted.

It's also fixed in v4.18:

665d4953cde6d9e75c62a07ec8f4f8fd7d396ade btrfs: scrub: Don't use inode
page cache in scrub_handle_errored_block()
ac0b4145d662a3b9e34085dea460fb06ede9b69b btrfs: scrub: Don't use inode
pages for device replace

Thanks,
Qu

> 
> P.S. Perhaps relative issue is in "Warnings" section:
> 
> https://wiki.debian.org/Btrfs#Warnings /
> https://www.spinics.net/lists/linux-btrfs/msg56563.html
> 



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 2/2] btrfs-progs: Deprecate unused super block member log_root_transid

2018-10-25 Thread Nikolay Borisov



On 25.10.2018 14:18, David Sterba wrote:
> On Wed, Oct 24, 2018 at 03:48:07PM +0100, Nikolay Borisov wrote:
>>
>>
>> On 24.10.18 г. 15:30 ч., David Sterba wrote:
>>> On Fri, Oct 12, 2018 at 05:57:29PM +0800, Qu Wenruo wrote:
 +  printf("log_root_transid (deprecated)\t%llu\n",
 + le64_to_cpu(sb->__unused_log_root_transid));
>>>
>>> This should be entirely removed.
>>
>> It looks OK to me.
>> Just like the old leafsize.
>>
>> And if we try to use this member again, even old progs could show it
>> without problem.
>
> But what is the point of having something which always shows 0 and is
> essentially unused?

 Personally speaking, even it's in fact never used, it is still part of
 the specification of btrfs super blocks.

 Even it's a bad decision we made a long time ago, we still need to mark
 it as unused, other than converting it back to "reserved".

 To make it short and clear, if some member is specified in early
 specification, even it's never used, we still need to mark it unused and
 keep part of naming.
>>>
>>> Agreed, follows the same patern as the leafsize. The superblock dump is
>>> a debugging and bug report tool, the full output is desirable.
>>
>> I really don't want to get into the argument but I think we should
>> really name unused fields unusedX where X is some number. Looking at the
>> code in struct btrfs_root_backup we have 2 unused fields. In the
>> superblock we have unused space which is called 'reserved'. In struct
>> btrfs_disk_balance_args there is a field called unused, same thing for
>> struct btrfs_balance_item.
>>
>> In btrfs_inode_item we have 'reserved' field, same thing for
>> btrfs_root_item.
>>
>> So the code base is yet again highly inconsistent and looking at it
>> there is only a single field following the __unused_original_name, so
>> it's really the exception rather than the rule.
> 
> You're right about the inconsistent namig, for the "reserved for future
> use" members. This could be unified to "reserved", and even though it's
> in a public header I would not expect any application using the items.
> 
> The __unused_original_name is a different kind of "unused", as it became
> obsolete at some point and the bytes cannot be reused, unlike the
> reserved ones.
> 
> It's an exception because it was the first instance, there was no
> established rule/pattern to follow just a deliberate choice in the
> naming so it captures the original purpose and current status.
> 
>> Again, I would argue that once a field is deprecated or in this case it
>> has _never_ actually been used then we might as well rename it to
>> something cryptic so as not to attach (false) meaning to it. Just my 50
>> cents
> 
> The log_root_transid is a again different from leafsize. I don't know
> what exactly you mean by the cryptic naming here, I'd rather keep the
> name selfexplanatory. Using 'reserved' could be misleading and somebody
> would want to use it for futher extensions.

But this is exactly my point - log_root_transid can indeed be repurposed
because it has _never_ been used in the code.



Re: Kernel crash related to LZO compression

2018-10-25 Thread Chris Murphy
On Thu, Oct 25, 2018 at 9:56 AM, Dmitry Katsubo  wrote:
> Dear btrfs community,
>
> My excuses for the dumps for rather old kernel (4.9.25), nevertheless I
> wonder
> about your opinion about the below reported kernel crashes.
>
> As I could understand the situation (correct me if I am wrong), it happened
> that some data block became corrupted which resulted the following kernel
> trace
> during the boot:
>
> kernel BUG at /build/linux-fB36Cv/linux-4.9.25/fs/btrfs/extent_io.c:2318!
> invalid opcode:  [#1] SMP
> Call Trace:
>  [] ? end_bio_extent_readpage+0x4e9/0x680 [btrfs]
>  [] ? end_compressed_bio_read+0x3b/0x2d0 [btrfs]
>  [] ? btrfs_scrubparity_helper+0xce/0x2d0 [btrfs]
>  [] ? process_one_work+0x141/0x380
>  [] ? worker_thread+0x41/0x460
>  [] ? kthread+0xb4/0xd0
>  [] ? process_one_work+0x380/0x380
>  [] ? kthread_park+0x50/0x50
>  [] ? ret_from_fork+0x1b/0x28
>
> The problematic file turned out to be the one used by systemd-journald
> /var/log/journal/c496cea41ebc4700a0dfaabf64a21be4/system.journal
> which was trying to read it (or append to it) during the boot and that was
> causing the system crash (see attached bootN_dmesg.txt).
>
> I've rebooted in safe mode and tried to copy the data from this partition to
> another location using btrfs-restore, however kernel was crashing as well
> with
> a bit different symphom (see attached copyN_dmesg.txt):
>
> Call Trace:
>  [] ? lzo_decompress_biovec+0x1b0/0x2b0 [btrfs]
>  [] ? vmalloc+0x38/0x40
>  [] ? end_compressed_bio_read+0x265/0x2d0 [btrfs]
>  [] ? btrfs_scrubparity_helper+0xce/0x2d0 [btrfs]
>  [] ? process_one_work+0x141/0x380
>  [] ? worker_thread+0x41/0x460
>  [] ? kthread+0xb4/0xd0
>  [] ? ret_from_fork+0x1b/0x28
>
> Just to keep away from the problem, I've removed this file and also removed
> "compress=lzo" mount option.
>
> Are there any updates / fixes done in that area? Is lzo option safe to use?


It should be safe even with that kernel. I'm not sure this is
compression related. There is a corruption bug related to inline
extents and corruption that had been fairly elusive but I think it's
fixed now. I haven't run into it though.

I would say the first step no matter what if you're using an older
kernel, is to boot a current Fedora or Arch live or install media,
mount the Btrfs and try to read the problem files and see if the
problem still happens. I can't even being to estimate the tens of
thousands of line changes since kernel 4.9.

What profile are you using for this Btrfs? Is this a raid56? What do
you get for 'btrfs fi us ' ?



-- 
Chris Murphy


Re: Failover for unattached USB device

2018-10-25 Thread Chris Murphy
On Thu, Oct 25, 2018 at 3:47 AM, Dmitry Katsubo  wrote:
>
>
> BTRFS error (device sdf): bdev /dev/sdh errs: wr 0, rd 1867, flush 0,
> corrupt 0, gen 0
> BTRFS error (device sdf): bdev /dev/sdg errs: wr 0, rd 1867, flush 0,
> corrupt 0, gen 0
>
> Attempts lasted for 29 minutes.

Yep, and it floods the log. It's extra fun if the journal is on the
device with errors. The more errors, the more writes and reads to the
problem drive, the more errors, the more writes, the more errors...
snowball.

But that's the state of error handling on Btrfs, which is still more
sophisticated than other file systems. It's not more sophisticated
than the kernel's md driver, which does have some sort of read error
rate limit and then it'll kick the drive out of the array (faulty
state) and stop complaining about it. And I think it considers a drive
faulty on a single write failure.

>
> Thanks for this information. I have a situation similar to yours, with
> only important difference that my drives are put into the USB dock with
> independent power and cooling like this one:
>
> https://www.ebay.com/itm/Mediasonic-ProBox-4-Bay-3-5-Hard-Drive-Enclosure-USB-3-0-eSATA-Sata-3-6-0Gbps/273161164246
>
> so I don't think I need to worry about amps. This dock is connected
> directly to USB port on the motherboard.

It is entirely plausible that this still needs a hub, but it really
depends on the exact errors you're getting. And those need to go to
the linux-usb list, I don't know enough about it.

And it might require a bit of luck to get a reply because it's a very
busy list. My main recommendation is to be very concise: They will
want to know the hardware setup (topology), lsusb -v, lspci, and a
complete dmesg. It'll seem reasonable to snip just to the usb error
messages, that almost always drives developers crazy because important
hints for problems can show up in kernel message during boot, so they
will inevitably want the whole dmesg. Ideal scenario is to do a clean
boot and then reproduce the problem, and then capture the dmesg, that
way it's a concise dmesg that isn't two weeks old with a bunch of
device connects and disconnects or whatever. There almost certainly
will be usb kernel parameters for debugging, ideally you search the
linux-usb list archives to find out what they are (I'm not sure) so
that you already have that set for your clean boot.

There might be usb quirks for your hardware setup that apply. Or they
might suggests that it still needs a  USB hub to clean things up
between controller and bridge chipset.



>
> However indeed there could be bugs both on dock side and in south bridge.
> More over I could imagine that USB reset happens due to another USB device,
> like a wave stated in one place turning into tsunami for the whole
> USB subsystem.

If there is a hub, one of their jobs is to prevent that from
happening. And if the drive enclosure and problem device are on
separate ports, they are effectively going through a built-in hub in
the usb host device. But yeah, you want to tell linux-usb exactly what
devices (and chipsets which lsusb -v will show)  you're using because
they may already know about such problems.


>
>> There are pending patches for something similar that you can find in
>> the archives. I think the reason they haven't been merged yet is there
>> haven't been enough comments and feedback (?). I think Anand Jain is
>> the author of those patches so you might dig around in the archives.
>> In a way you have an ideal setup for testing them out. Just make sure
>> you have backups...
>
>
> Thanks for reference. Should I look for this patch here:
>
> https://patchwork.kernel.org/project/linux-btrfs/list/?submitter=34632=-date

Maybe, it's a lot of patches to go through. I'm using
https://lore.kernel.org/linux-btrfs which has a search field.

This is the recent email I was thinking of that might point you in the
right direction:

https://lore.kernel.org/linux-btrfs/2287c62d-6dbb-3b30-1134-d754e4294...@oracle.com/

A complicating factor is that the block layer does do some retries.
I'm also not familiar enough with the way md does retries and sets
drives as faulty and if that is really what Btrfs should replicate or
not. Some of these conversations require cooperation with other kernel
developers, I suspect, like libata, SCSI, USB, SD, in order to make
sure no one is being stepped on with some big surprise.



>
> I didn't observe any errors while doing "btrfs check" on this volume after
> several such resets, because that volume is mostly used for reading and
> chance that USB reset happens during the write is very low.

If it mounts and the most recent changes are readable without errors,
the file system is probably fine. Btrfs is pretty good at detecting
and correcting for hardware related problems, in that it is fussier
than other file systems because it can detect such problems in both
metadata and data and should be able to avoid them in the first place
due to always on COW (as long as you 

Re: [PATCH 0/3] fix pinned underflow in generic/475

2018-10-25 Thread Josef Bacik
On Wed, Oct 24, 2018 at 08:24:00PM +0800, Lu Fengqi wrote:
> When running generic/475, pinned underflow may occur. This patch will
> fix this problem, but there are still other warnings need to addressed in
> this case.
> 
> Patch 1-2 introduce a macro and wrappers to help detect underflow
> Patch 3   the fix patch of pinned underflow
> 
> Lu Fengqi (2):
>   btrfs: extent-tree: Detect bytes_pinned underflow earlier
>   btrfs: fix pinned underflow after transaction aborted
> 
> Qu Wenruo (1):
>   btrfs: extent-tree: Detect bytes_may_use underflow earlier
> 

You can add

Reviewed-by: Josef Bacik 

to the series, thanks,

Josef


Re: [PATCH 0/3] btrfs-progs: Minor qgroup subcommand usage update

2018-10-25 Thread David Sterba
On Mon, Aug 06, 2018 at 02:00:05PM +0800, Qu Wenruo wrote:
> It turns out even commit e5a6610c943b ("btrfs-progs: qgroup assign: add
> option to schedule rescan") introduced the ability to auto rescan, it's
> less known option and even some developer find it hard to understand.
> (Well, the whole quota thing is already a little hard to understand)
> 
> Thus it makes more sense to make --rescan as default behavior, and add
> more explanation on why quota rescan is needed, and under which case we
> can skip the rescan.
> 
> Qu Wenruo (3):
>   btrfs-progs: cmds-qgroup: Use bool to replace int for @rescan
>   btrfs-progs: qgroup: make --rescan as the default behavior for assign
>   btrfs-progs: Doc: Update btrfs-qgroup for the rescan condition

Applied, thanks. I reworded some parts, so don't be surprised that there
are changes.


Re: [PATCH 0/2] btrfstune fs_info cleanups

2018-10-25 Thread David Sterba
On Mon, Aug 20, 2018 at 04:28:03PM +0300, Nikolay Borisov wrote:
> While inspecting btrfstune source code I came across 2 functions which 
> needlessly
> take fs_info argument. This small series rectifies this. No functional 
> changes. 
> 
> Nikolay Borisov (2):
>   btrfstune: Remove fs_info arg from change_device_uuid
>   btrfstune: Rename change_header_uuid to change_buffer_header_uuid

Applied, thanks.


Re: [PATCH v2 0/6] btrfs-progs: print-tree: Minor enhancement along with --bfs/--dfs options

2018-10-25 Thread David Sterba
On Fri, Sep 28, 2018 at 10:04:40AM +0800, Qu Wenruo wrote:
> This patchset can be fetched from github:
> https://github.com/adam900710/btrfs-progs/tree/dump_tree_enhance
> 
> The main point of this patchset is to allow "btrfs ins dump-tree" to
> print tree blocks in breadth-first order when level is higher than 2.
> 
> The 1st patch is just a minor cleanup, to remove some unused and
> meaningless output.
> 
> The 2nd patch does a root<->fs_info cleanup, provides the basis for
> later btrfs_next_sibling_tree_block().
> 
> The 3rd patch implements a new function, btrfs_next_sibling_tree_block()
> to find next sibling tree block, other than leaf.
> 
> The 4th patch implements BFS as bfs_print_children().
> 
> The BFS search itself is implemented using path along with
>  path::lowest_level and btrfs_next_sibling_tree_block() to iterate all
> sibling tree blocks in a level.
> 
> The 5th patch adds --bfs/--dfs options for dump-tree.
> 
> The final patch changes @follow type from int to bool.
> 
> Changelog:
> v2:
>   Make bfs/dfs selectable by adding --bfs and --dfs options.
>   Still keep dfs as default (although I really don't see any
>   compatibility problem)

Let's do it in two steps: first introduce the bfs/dfs modes and switch
the default eventually if it proves more useful over time.

I'm going to apply the patches, with Nikolay's reviewed-by for patches
1-5 as the code is equivalent to what he acked in v1.


Kernel crash related to LZO compression

2018-10-25 Thread Dmitry Katsubo

Dear btrfs community,

My excuses for the dumps for rather old kernel (4.9.25), nevertheless I 
wonder

about your opinion about the below reported kernel crashes.

As I could understand the situation (correct me if I am wrong), it 
happened
that some data block became corrupted which resulted the following 
kernel trace

during the boot:

kernel BUG at 
/build/linux-fB36Cv/linux-4.9.25/fs/btrfs/extent_io.c:2318!

invalid opcode:  [#1] SMP
Call Trace:
 [] ? end_bio_extent_readpage+0x4e9/0x680 [btrfs]
 [] ? end_compressed_bio_read+0x3b/0x2d0 [btrfs]
 [] ? btrfs_scrubparity_helper+0xce/0x2d0 [btrfs]
 [] ? process_one_work+0x141/0x380
 [] ? worker_thread+0x41/0x460
 [] ? kthread+0xb4/0xd0
 [] ? process_one_work+0x380/0x380
 [] ? kthread_park+0x50/0x50
 [] ? ret_from_fork+0x1b/0x28

The problematic file turned out to be the one used by systemd-journald
/var/log/journal/c496cea41ebc4700a0dfaabf64a21be4/system.journal
which was trying to read it (or append to it) during the boot and that 
was

causing the system crash (see attached bootN_dmesg.txt).

I've rebooted in safe mode and tried to copy the data from this 
partition to
another location using btrfs-restore, however kernel was crashing as 
well with

a bit different symphom (see attached copyN_dmesg.txt):

Call Trace:
 [] ? lzo_decompress_biovec+0x1b0/0x2b0 [btrfs]
 [] ? vmalloc+0x38/0x40
 [] ? end_compressed_bio_read+0x265/0x2d0 [btrfs]
 [] ? btrfs_scrubparity_helper+0xce/0x2d0 [btrfs]
 [] ? process_one_work+0x141/0x380
 [] ? worker_thread+0x41/0x460
 [] ? kthread+0xb4/0xd0
 [] ? ret_from_fork+0x1b/0x28

Just to keep away from the problem, I've removed this file and also 
removed

"compress=lzo" mount option.

Are there any updates / fixes done in that area? Is lzo option safe to 
use?


P.S. Perhaps relative issue is in "Warnings" section:

https://wiki.debian.org/Btrfs#Warnings / 
https://www.spinics.net/lists/linux-btrfs/msg56563.html


--
With best regards,
Dmitry[   13.100666] BTRFS critical (device sda3): stripe index math went horribly 
wrong, got stripe_index=4294936575, num_stripes=2
[   13.100901] BTRFS critical (device sda3): stripe index math went horribly 
wrong, got stripe_index=4294936575, num_stripes=2
[   13.101096] BTRFS critical (device sda3): stripe index math went horribly 
wrong, got stripe_index=4294936575, num_stripes=2
[   13.101178] [ cut here ]
[   13.101182] kernel BUG at 
/build/linux-fB36Cv/linux-4.9.25/fs/btrfs/extent_io.c:2318!
[   13.101185] invalid opcode:  [#1] SMP
[   13.101257] Modules linked in: binfmt_misc bridge stp llc iTCO_wdt 
iTCO_vendor_support arc4 ppdev coretemp ath5k pcspkr ath sr9700 mac80211 dm9601 
serio_raw usbnet cfg80211 snd_hda_codec_realtek snd_hda_codec_generic mii 
rfkill lpc_ich snd_hda_intel i915 mfd_core snd_hda_codec evdev sg snd_hda_core 
snd_hwdep snd_pcm_oss snd_mixer_oss rng_core snd_pcm snd_timer video snd 
drm_kms_helper soundcore drm parport_pc parport i2c_algo_bit shpchp button 
acpi_cpufreq netconsole configfs w83627hf hwmon_vid ip_tables x_tables autofs4 
xfs libcrc32c btrfs crc32c_generic xor raid6_pq ses enclosure 
scsi_transport_sas uas hid_generic usbhid usb_storage hid sd_mod sr_mod cdrom 
i2c_i801 i2c_smbus firewire_ohci ata_generic firewire_core crc_itu_t ehci_pci 
ata_piix libata uhci_hcd ehci_hcd scsi_mod e1000e ptp pps_core
[   13.101261]  usbcore usb_common
[   13.101267] CPU: 0 PID: 96 Comm: kworker/u4:2 Tainted: GW   
4.9.0-3-686-pae #1 Debian 4.9.25-1
[   13.101269] Hardware name: AOpen i945GMx-IF/i945GMx-IF, BIOS i945GMx-IF 
R1.01 Mar.02.2007 AOpen Inc. 03/02/2007
[   13.101326] Workqueue: btrfs-endio btrfs_endio_helper [btrfs]
[   13.101328] task: f6d409c0 task.stack: f6d46000
[   13.101332] EIP: 0060:[] EFLAGS: 00010203 CPU: 0
[   13.101373] EIP is at btrfs_check_repairable+0x12c/0x130 [btrfs]
[   13.101375] EAX: 8800 EBX: f292dd80 ECX: 8801 EDX: 0002
[   13.101378] ESI: f69c EDI: f678bc5c EBP: f6d47e50 ESP: f6d47e30
[   13.101381]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[   13.101383] CR0: 80050033 CR2: b64c6db0 CR3: 36c115a0 CR4: 06f0
[   13.101386] Stack:
[   13.101395]  1000   f292dd80 d04e93d0  f35885d8 
f35885d8
[   13.101402]  f6d47ed8 f8c63739 0001  f6d47ec4 f8c951eb f3bb4800 
0001
[   13.101412]  0009 f678bc00 f35884b0   0001  

[   13.101413] Call Trace:
[   13.101457]  [] ? end_bio_extent_readpage+0x4e9/0x680 [btrfs]
[   13.101497]  [] ? end_compressed_bio_read+0x3b/0x2d0 [btrfs]
[   13.101538]  [] ? btrfs_scrubparity_helper+0xce/0x2d0 [btrfs]
[   13.101548]  [] ? process_one_work+0x141/0x380
[   13.101553]  [] ? worker_thread+0x41/0x460
[   13.101557]  [] ? kthread+0xb4/0xd0
[   13.101561]  [] ? process_one_work+0x380/0x380
[   13.101566]  [] ? kthread_park+0x50/0x50
[   13.101572]  [] ? ret_from_fork+0x1b/0x28
[   13.104547] Modules linked in: binfmt_misc bridge stp llc iTCO_wdt 
iTCO_vendor_support 

Re: [PATCH] btrfs-progs: image: Warn about log tree generation mismatch when restore

2018-10-25 Thread David Sterba
On Thu, Oct 04, 2018 at 02:02:13PM +0800, Qu Wenruo wrote:
> At restore time, btrfs-image will try to fixup dev extents, this will
> trigger a transaction.
> 
> It's normally OK to start a new transaction, but for image with log
> tree, a new transaction will bump up generation, and log tree generation
> will not match with super block generation + 1.
> 
> There is no good way to solve it yet. Just output a warning for now.
> 
> Signed-off-by: Qu Wenruo 

Applied, thanks.


Re: [PATCH 2/2] btrfs-progs: Deprecate unused super block member log_root_transid

2018-10-25 Thread David Sterba
On Wed, Oct 24, 2018 at 03:48:07PM +0100, Nikolay Borisov wrote:
> 
> 
> On 24.10.18 г. 15:30 ч., David Sterba wrote:
> > On Fri, Oct 12, 2018 at 05:57:29PM +0800, Qu Wenruo wrote:
> >> +  printf("log_root_transid (deprecated)\t%llu\n",
> >> + le64_to_cpu(sb->__unused_log_root_transid));
> >
> > This should be entirely removed.
> 
>  It looks OK to me.
>  Just like the old leafsize.
> 
>  And if we try to use this member again, even old progs could show it
>  without problem.
> >>>
> >>> But what is the point of having something which always shows 0 and is
> >>> essentially unused?
> >>
> >> Personally speaking, even it's in fact never used, it is still part of
> >> the specification of btrfs super blocks.
> >>
> >> Even it's a bad decision we made a long time ago, we still need to mark
> >> it as unused, other than converting it back to "reserved".
> >>
> >> To make it short and clear, if some member is specified in early
> >> specification, even it's never used, we still need to mark it unused and
> >> keep part of naming.
> > 
> > Agreed, follows the same patern as the leafsize. The superblock dump is
> > a debugging and bug report tool, the full output is desirable.
> 
> I really don't want to get into the argument but I think we should
> really name unused fields unusedX where X is some number. Looking at the
> code in struct btrfs_root_backup we have 2 unused fields. In the
> superblock we have unused space which is called 'reserved'. In struct
> btrfs_disk_balance_args there is a field called unused, same thing for
> struct btrfs_balance_item.
> 
> In btrfs_inode_item we have 'reserved' field, same thing for
> btrfs_root_item.
> 
> So the code base is yet again highly inconsistent and looking at it
> there is only a single field following the __unused_original_name, so
> it's really the exception rather than the rule.

You're right about the inconsistent namig, for the "reserved for future
use" members. This could be unified to "reserved", and even though it's
in a public header I would not expect any application using the items.

The __unused_original_name is a different kind of "unused", as it became
obsolete at some point and the bytes cannot be reused, unlike the
reserved ones.

It's an exception because it was the first instance, there was no
established rule/pattern to follow just a deliberate choice in the
naming so it captures the original purpose and current status.

> Again, I would argue that once a field is deprecated or in this case it
> has _never_ actually been used then we might as well rename it to
> something cryptic so as not to attach (false) meaning to it. Just my 50
> cents

The log_root_transid is a again different from leafsize. I don't know
what exactly you mean by the cryptic naming here, I'd rather keep the
name selfexplanatory. Using 'reserved' could be misleading and somebody
would want to use it for futher extensions.


Re: Failover for unattached USB device

2018-10-25 Thread Dmitry Katsubo

On 2018-10-24 20:05, Chris Murphy wrote:

I think about the best we can expect in the short term is that Btrfs
goes read-only before the file system becomes corrupted in a way it
can't recover with a normal mount. And I'm not certain it is in this
state of development right now for all cases. And I say the same thing
for other file systems as well.

Running Btrfs on USB devices is fine, so long as they're well behaved.
I have such a setup with USB 3.0 devices. Perhaps I got a bit lucky,
because there are a lot of known bugs with USB controllers, USB bridge
chipsets, and USB hubs.

Having user definable switches for when to go read-only is, I think
misleading to the user, and very likely will mislead the file system.
The file system needs to go read-only when it gets confused, period.
It doesn't matter what the error rate is.


In general I agree. I just wonder why it couldn't happen quicker. For
example, from the log I've originally attached one can see that btrfs
made 1867 attempts to read (perhaps the same) block from both devices
in RAID1 volume, without success:

BTRFS error (device sdf): bdev /dev/sdh errs: wr 0, rd 1867, flush 0, 
corrupt 0, gen 0
BTRFS error (device sdf): bdev /dev/sdg errs: wr 0, rd 1867, flush 0, 
corrupt 0, gen 0


Attempts lasted for 29 minutes.


The work around is really to do the hard work making the devices
stable. Not asking Btrfs to paper over known unstable hardware.

In my case, I started out with rare disconnects and resets with
directly attached drives. This was a couple years ago. It was a Btrfs
raid1 setup, and the drives would not go missing at the same time, but
both would just drop off from time to time. Btrfs would complain of
dropped writes, I vaguely remember it going read only. But normal
mounts worked, sometimes with scary errors but always finding a good
copy on the other drive, and doing passive fixups. Scrub would always
fix up the rest. I'm still using those same file systems on those
devices, but now they go through a dyconn USB 3.0 hub with a decently
good power supply. I originally thought the drop offs were power
related, so I explicitly looked for a USB hub that could supply at
least 2A, and this one is 12VDC @ 2500mA. A laptop drive will draw
nearly 1A on spin up, but at that point P=AV. Laptop drives during
read/write using 1.5 W to 2.5 W @ 5VDC.

1.5-2.5 W = A * 5 V
Therefore A = 0.3-0.5A

And for 4 drives at possibly 0.5 A (although my drives are all at the
1.6 W read/write), that's 2 A @ 5 V, which is easily maintained for
the hub power supply (which by my calculation could do 6 A @ 5 V, not
accounting for any resistance).

Anyway, as it turns out I don't think it was power related, as the
Intel NUC in question probably had just enough amps per port. And what
it really was, was incompatibility between the Intel controller and
the bridgechipset in the USB-SATA cases, and the USB hub is similar to
an ethernet hub, it actually reads the USB stream and rewrites it out.
So hubs are actually pretty complicated little things, and having a
good one matters.


Thanks for this information. I have a situation similar to yours, with
only important difference that my drives are put into the USB dock with
independent power and cooling like this one:

https://www.ebay.com/itm/Mediasonic-ProBox-4-Bay-3-5-Hard-Drive-Enclosure-USB-3-0-eSATA-Sata-3-6-0Gbps/273161164246

so I don't think I need to worry about amps. This dock is connected
directly to USB port on the motherboard.

However indeed there could be bugs both on dock side and in south 
bridge.
More over I could imagine that USB reset happens due to another USB 
device,

like a wave stated in one place turning into tsunami for the whole
USB subsystem.


There are pending patches for something similar that you can find in
the archives. I think the reason they haven't been merged yet is there
haven't been enough comments and feedback (?). I think Anand Jain is
the author of those patches so you might dig around in the archives.
In a way you have an ideal setup for testing them out. Just make sure
you have backups...


Thanks for reference. Should I look for this patch here:

https://patchwork.kernel.org/project/linux-btrfs/list/?submitter=34632=-date

or this patch was only floating around in this maillist?


'btrfs check' without the --repair flag is safe and read only but
takes a long time because it'll read all metadata. The fastest safe
way is to mount it ro and read a directory recently being written to
and see if there are any kernel errors. You could recursively copy
files from a directory to /dev/null and then check kernel messages for
any errors. So long as metadata is DUP, there is a good chance a bad
copy of metadata can be automatically fixed up with a good copy. If
there's only single copy of metadata, or both copies get corrupt, then
it's difficult. Usually recovery of data is possible, but depending on
what's damaged, repair might not be possible.


I think "btrfs check" would be too heavy. 

[PATCH 3/4] btrfs-progs: original check: Add ability to repair dir item with invalid hash

2018-10-25 Thread Qu Wenruo
The repair function is reusing delete_corrupted_dir_item().

Since the error can happen for root dir inode, also call
try_repair_inode() on root dir inode.

This is especially important for old btrfs, since later kernel
introduces restrict tree-checker, which could detect such hash mismatch
and refuse to read the corrupted leaf.

With this repair ability, user could repair with btrfs check --repair.

Link: https://bugzilla.opensuse.org/show_bug.cgi?id=991
Signed-off-by: Qu Wenruo 
---
 check/main.c | 45 -
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/check/main.c b/check/main.c
index 4a4f2a7c9cdb..1abf6c994710 100644
--- a/check/main.c
+++ b/check/main.c
@@ -2658,6 +2658,41 @@ out:
return ret;
 }
 
+static int repair_mismatch_dir_hash(struct btrfs_trans_handle *trans,
+   struct btrfs_root *root,
+   struct inode_record *rec)
+{
+   struct mismatch_dir_hash_record *hash;
+   int ret;
+
+   printf(
+   "Deleting bad dir items with invalid hash for root %llu ino %llu\n",
+   root->root_key.objectid, rec->ino);
+   while (!list_empty(>mismatch_dir_hash)) {
+   char *namebuf;
+
+   hash = list_entry(rec->mismatch_dir_hash.next,
+   struct mismatch_dir_hash_record, list);
+   namebuf = (char *)(hash + 1);
+
+   ret = delete_corrupted_dir_item(trans, root, >key,
+   namebuf, hash->namelen);
+   if (ret < 0)
+   break;
+
+   /* Also reduce dir isize */
+   rec->found_size -= hash->namelen;
+   list_del(>list);
+   free(hash);
+   }
+   if (!ret) {
+   rec->errors &= ~I_ERR_MISMATCH_DIR_HASH;
+   /* We rely on later dir isize repair to reset dir isize */
+   rec->errors |= I_ERR_DIR_ISIZE_WRONG;
+   }
+   return ret;
+}
+
 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
 {
struct btrfs_trans_handle *trans;
@@ -2671,7 +2706,8 @@ static int try_repair_inode(struct btrfs_root *root, 
struct inode_record *rec)
 I_ERR_FILE_EXTENT_ORPHAN |
 I_ERR_FILE_EXTENT_DISCOUNT |
 I_ERR_FILE_NBYTES_WRONG |
-I_ERR_INLINE_RAM_BYTES_WRONG)))
+I_ERR_INLINE_RAM_BYTES_WRONG |
+I_ERR_MISMATCH_DIR_HASH)))
return rec->errors;
 
/*
@@ -2686,6 +2722,8 @@ static int try_repair_inode(struct btrfs_root *root, 
struct inode_record *rec)
return PTR_ERR(trans);
 
btrfs_init_path();
+   if (!ret && rec->errors & I_ERR_MISMATCH_DIR_HASH)
+   ret = repair_mismatch_dir_hash(trans, root, rec);
if (rec->errors & I_ERR_NO_INODE_ITEM)
ret = repair_inode_no_item(trans, root, , rec);
if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
@@ -2780,6 +2818,11 @@ static int check_inode_recs(struct btrfs_root *root,
rec = get_inode_rec(inode_cache, root_dirid, 0);
BUG_ON(IS_ERR(rec));
if (rec) {
+   if (repair) {
+   ret = try_repair_inode(root, rec);
+   if (ret < 0)
+   error++;
+   }
ret = check_root_dir(rec);
if (ret) {
fprintf(stderr, "root %llu root dir %llu error\n",
-- 
2.19.1



[PATCH 4/4] btrfs-progs: fsck-tests: Make 026-bad-dir-item-name test case to verify if btrfs-check can also repair it

2018-10-25 Thread Qu Wenruo
Just remove the customized 'test.sh', then generic fsck test will do the
check-repair-check.

Signed-off-by: Qu Wenruo 
---
 .../026-bad-dir-item-name/description.txt | 41 +++
 .../fsck-tests/026-bad-dir-item-name/test.sh  | 13 --
 2 files changed, 41 insertions(+), 13 deletions(-)
 create mode 100644 tests/fsck-tests/026-bad-dir-item-name/description.txt
 delete mode 100755 tests/fsck-tests/026-bad-dir-item-name/test.sh

diff --git a/tests/fsck-tests/026-bad-dir-item-name/description.txt 
b/tests/fsck-tests/026-bad-dir-item-name/description.txt
new file mode 100644
index ..2bdb0f8179b0
--- /dev/null
+++ b/tests/fsck-tests/026-bad-dir-item-name/description.txt
@@ -0,0 +1,41 @@
+"default_case.img.xz" contains the fs with the following tree dump of fs tree:
+
+  [snip]
+   item 2 key (256 DIR_ITEM 751495445) itemoff 16019 itemsize 92
+   location key (259 INODE_ITEM 0) type FILE
+   transid 9 data_len 0 name_len 13
+   name: foor.WvG1c1Td
+ ^ Hash doesn't match with key
+   location key (260 INODE_ITEM 0) type FILE
+   transid 12 data_len 0 name_len 19
+   name: user.J3__T_Km3dVsW_
+   item 3 key (256 DIR_INDEX 4) itemoff 15976 itemsize 43
+   location key (259 INODE_ITEM 0) type FILE
+   transid 9 data_len 0 name_len 13
+   name: foor.WvG1c1Td
+   item 4 key (256 DIR_INDEX 5) itemoff 15927 itemsize 49
+   location key (260 INODE_ITEM 0) type FILE
+   transid 12 data_len 0 name_len 19
+   name: user.J3__T_Km3dVsW_
+   item 5 key (259 INODE_ITEM 0) itemoff 15767 itemsize 160
+   generation 9 transid 9 size 0 nbytes 0
+   block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
+   sequence 1 flags 0x0(none)
+   atime 1499844359.341125147 (2017-07-12 15:25:59)
+   ctime 1499844359.341125147 (2017-07-12 15:25:59)
+   mtime 1499844359.341125147 (2017-07-12 15:25:59)
+   otime 1499844359.341125147 (2017-07-12 15:25:59)
+   item 6 key (259 INODE_REF 256) itemoff 15744 itemsize 23
+   index 4 namelen 13 name: foor.WvG1c1Td
+   item 7 key (260 INODE_ITEM 0) itemoff 15584 itemsize 160
+   generation 12 transid 12 size 0 nbytes 0
+   block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
+   sequence 1 flags 0x0(none)
+   atime 1499844544.931130070 (2017-07-12 15:29:04)
+   ctime 1499844544.931130070 (2017-07-12 15:29:04)
+   mtime 1499844544.931130070 (2017-07-12 15:29:04)
+   otime 1499844544.931130070 (2017-07-12 15:29:04)
+   item 8 key (260 INODE_REF 256) itemoff 1 itemsize 29
+   index 5 namelen 19 name: user.J3__T_Km3dVsW_
+
+Test case is going to check if btrfs check can detect and repair it.
diff --git a/tests/fsck-tests/026-bad-dir-item-name/test.sh 
b/tests/fsck-tests/026-bad-dir-item-name/test.sh
deleted file mode 100755
index a38bf045ae82..
--- a/tests/fsck-tests/026-bad-dir-item-name/test.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-#
-# confirm whether check detects name and hash mismatch in dir_item
-
-source "$TEST_TOP/common"
-
-check_prereq btrfs
-
-image=$(extract_image "./default_case.img.xz")
-
-run_mustfail "dir_item hash mismatch not found" "$TOP/btrfs" check "$image"
-
-rm -f "$image"
-- 
2.19.1



[PATCH 2/4] btrfs-progs: original check: Use mismatch_dir_hash_record to record bad dir items

2018-10-25 Thread Qu Wenruo
This allow us to report the error better, from old in-place report like:
  ERROR: DIR_ITEM[256 751495445] name foor.WvG1c1TdU namelen 13 filetype 1 
mismatch with its hash, wanted 751495445 have 2870353892
  root 5 root dir 256 error

To new centralized report:
  root 5 root dir 256 error
  root 5 inode 256 errors 4
  Dir items with mismatch hash:
  name: foor.WvG1c1Td namelen: 13 wanted 0xab161fe4 has 0x2ccae915

Also, with mismatch_dir_hash_record structure, it provides the basis for
later original mode repair.

Signed-off-by: Qu Wenruo 
---
 check/main.c  | 76 ---
 check/mode-original.h | 14 
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/check/main.c b/check/main.c
index bc2ee22f7943..4a4f2a7c9cdb 100644
--- a/check/main.c
+++ b/check/main.c
@@ -462,6 +462,8 @@ static struct inode_record *clone_inode_rec(struct 
inode_record *orig_rec)
struct inode_backref *tmp;
struct orphan_data_extent *src_orphan;
struct orphan_data_extent *dst_orphan;
+   struct mismatch_dir_hash_record *hash_record;
+   struct mismatch_dir_hash_record *new_record;
struct rb_node *rb;
size_t size;
int ret;
@@ -473,6 +475,7 @@ static struct inode_record *clone_inode_rec(struct 
inode_record *orig_rec)
rec->refs = 1;
INIT_LIST_HEAD(>backrefs);
INIT_LIST_HEAD(>orphan_extents);
+   INIT_LIST_HEAD(>mismatch_dir_hash);
rec->holes = RB_ROOT;
 
list_for_each_entry(orig, _rec->backrefs, list) {
@@ -494,6 +497,16 @@ static struct inode_record *clone_inode_rec(struct 
inode_record *orig_rec)
memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
list_add_tail(_orphan->list, >orphan_extents);
}
+   list_for_each_entry(hash_record, _rec->mismatch_dir_hash, list) {
+   size = sizeof(*hash_record) + hash_record->namelen;
+   new_record = malloc(size);
+   if (!new_record) {
+   ret = -ENOMEM;
+   goto cleanup;
+   }
+   memcpy(_record, hash_record, size);
+   list_add_tail(_record->list, >mismatch_dir_hash);
+   }
ret = copy_file_extent_holes(>holes, _rec->holes);
if (ret < 0)
goto cleanup_rb;
@@ -522,6 +535,13 @@ cleanup:
list_del(>list);
free(orig);
}
+   if (!list_empty(>mismatch_dir_hash)) {
+   list_for_each_entry_safe(hash_record, new_record,
+   >mismatch_dir_hash, list) {
+   list_del(_record->list);
+   free(hash_record);
+   }
+   }
 
free(rec);
 
@@ -621,6 +641,25 @@ static void print_inode_error(struct btrfs_root *root, 
struct inode_record *rec)
round_up(rec->isize,
 root->fs_info->sectorsize));
}
+
+   /* Print dir item with mismatch hash */
+   if (errors & I_ERR_MISMATCH_DIR_HASH) {
+   struct mismatch_dir_hash_record *hash_record;
+
+   fprintf(stderr, "Dir items with mismatch hash:\n");
+   list_for_each_entry(hash_record, >mismatch_dir_hash,
+   list) {
+   char *namebuf = (char *)(hash_record + 1);
+   u32 crc;
+
+   crc = btrfs_name_hash(namebuf, hash_record->namelen);
+   fprintf(stderr,
+   "\tname: %.*s namelen: %u wanted 0x%08x has 0x%08llx\n",
+   hash_record->namelen, namebuf,
+   hash_record->namelen, crc,
+   hash_record->key.offset);
+   }
+   }
 }
 
 static void print_ref_error(int errors)
@@ -682,6 +721,7 @@ static struct inode_record *get_inode_rec(struct cache_tree 
*inode_cache,
rec->refs = 1;
INIT_LIST_HEAD(>backrefs);
INIT_LIST_HEAD(>orphan_extents);
+   INIT_LIST_HEAD(>mismatch_dir_hash);
rec->holes = RB_ROOT;
 
node = malloc(sizeof(*node));
@@ -718,6 +758,8 @@ static void free_orphan_data_extents(struct list_head 
*orphan_extents)
 static void free_inode_rec(struct inode_record *rec)
 {
struct inode_backref *backref;
+   struct mismatch_dir_hash_record *hash;
+   struct mismatch_dir_hash_record *next;
 
if (--rec->refs > 0)
return;
@@ -727,6 +769,8 @@ static void free_inode_rec(struct inode_record *rec)
list_del(>list);
free(backref);
}
+   list_for_each_entry_safe(hash, next, >mismatch_dir_hash, list)
+   free(hash);
free_orphan_data_extents(>orphan_extents);
free_file_extent_holes(>holes);
free(rec);
@@ -1273,6 

[PATCH 0/4] btrfs-progs: check: Add repair support for mismatch dir item hash

2018-10-25 Thread Qu Wenruo
This patchset can be fetched from github:
https://github.com/adam900710/btrfs-progs/tree/repair_bad_dir_item_hash

This is still based on the latest stable v4.17.1 branch (which is already a
little old now)

We have report from suse bugzilla where user report latest kernel refuse
to mount the fs.

The problem turns out to be that some old kernel (around Jan 2017) has a
bug that could leads to corrupted dir item with mismatch hash.

This patchset will allow btrfs check --repair to repair such problem.

Qu Wenruo (4):
  btrfs-progs: lowmem check: Add ability to repair dir item with
mismatch hash
  btrfs-progs: original check: Use mismatch_dir_hash_record to record
bad dir items
  btrfs-progs: original check: Add ability to repair dir item with
invalid hash
  btrfs-progs: fsck-tests: Make 026-bad-dir-item-name test case to
verify if btrfs-check can also repair it

 check/main.c  | 121 +-
 check/mode-common.c   |  51 
 check/mode-common.h   |   5 +-
 check/mode-lowmem.c   |  46 ++-
 check/mode-lowmem.h   |   1 +
 check/mode-original.h |  14 ++
 ctree.h   |   3 +
 dir-item.c|   6 +-
 .../026-bad-dir-item-name/description.txt |  41 ++
 .../fsck-tests/026-bad-dir-item-name/test.sh  |  13 --
 10 files changed, 272 insertions(+), 29 deletions(-)
 create mode 100644 tests/fsck-tests/026-bad-dir-item-name/description.txt
 delete mode 100755 tests/fsck-tests/026-bad-dir-item-name/test.sh

-- 
2.19.1



[PATCH 1/4] btrfs-progs: lowmem check: Add ability to repair dir item with mismatch hash

2018-10-25 Thread Qu Wenruo
For DIR_ITEM with mismatch hash, we could just remove the offending dir
item.

Lowmem mode will handle the rest part (either re-create the correct
dir_item or move the orphan inode to lost+found).

This is especially important for old btrfs, since later kernel
introduces restrict tree-checker, which could detect such hash mismatch
and refuse to read the corrupted leaf.

With this repair ability, user could repair with btrfs check
--mode=lowmem --repair.

Link: https://bugzilla.opensuse.org/show_bug.cgi?id=991
Signed-off-by: Qu Wenruo 
---
 check/mode-common.c | 51 +
 check/mode-common.h |  5 -
 check/mode-lowmem.c | 46 +++-
 check/mode-lowmem.h |  1 +
 ctree.h |  3 +++
 dir-item.c  |  6 +-
 6 files changed, 101 insertions(+), 11 deletions(-)

diff --git a/check/mode-common.c b/check/mode-common.c
index 15e2bbd1f30f..8660f43f6b20 100644
--- a/check/mode-common.c
+++ b/check/mode-common.c
@@ -742,3 +742,54 @@ void cleanup_excluded_extents(struct btrfs_fs_info 
*fs_info)
}
fs_info->excluded_extents = NULL;
 }
+
+/*
+ * Delete one corrupted dir item whose hash doesn't match its name.
+ *
+ * Since its hash is incorrect, we can't use btrfs_name_hash() to calculate
+ * the search key, but relie on @di_key parameter to do the search.
+ */
+int delete_corrupted_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *di_key, char *namebuf,
+ u32 namelen)
+{
+   struct btrfs_dir_item *di_item;
+   struct btrfs_path path;
+   int ret;
+
+   btrfs_init_path();
+   ret = btrfs_search_slot(trans, root, di_key, , 0, 1);
+   if (ret > 0) {
+   error("key (%llu %u %llu) doesn't exist in root %llu",
+   di_key->objectid, di_key->type, di_key->offset,
+   root->root_key.objectid);
+   ret = -ENOENT;
+   goto out;
+   }
+   if (ret < 0) {
+   error("failed to search root %llu: %d",
+   root->root_key.objectid, ret);
+   goto out;
+   }
+
+   di_item = btrfs_match_dir_item_name(root, , namebuf, namelen);
+   if (!di_item) {
+   /*
+* This is possible if the dir_item has incorrect namelen.
+* But in that case, we shouldn't reach repair path here.
+*/
+   error("no dir item named '%s' found with key (%llu %u %llu)",
+   namebuf, di_key->objectid, di_key->type,
+   di_key->offset);
+   ret = -ENOENT;
+   goto out;
+   }
+   ret = btrfs_delete_one_dir_name(trans, root, , di_item);
+   if (ret < 0)
+   error("failed to delete one dir name: %d", ret);
+
+out:
+   btrfs_release_path();
+   return ret;
+}
diff --git a/check/mode-common.h b/check/mode-common.h
index 6b05f8baf474..fda319267b04 100644
--- a/check/mode-common.h
+++ b/check/mode-common.h
@@ -121,5 +121,8 @@ void reset_cached_block_groups(struct btrfs_fs_info 
*fs_info);
 int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
 int exclude_metadata_blocks(struct btrfs_fs_info *fs_info);
 void cleanup_excluded_extents(struct btrfs_fs_info *fs_info);
-
+int delete_corrupted_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *di_key, char *namebuf,
+ u32 namelen);
 #endif
diff --git a/check/mode-lowmem.c b/check/mode-lowmem.c
index 1bce44f5658a..fe3d5940faf0 100644
--- a/check/mode-lowmem.c
+++ b/check/mode-lowmem.c
@@ -1464,17 +1464,52 @@ out:
return ret;
 }
 
+/*
+ * A wrapper for delete_corrupted_dir_item(), with support part like
+ * start/commit transaction.
+ */
+static int lowmem_delete_corrupted_dir_item(struct btrfs_root *root,
+   struct btrfs_key *di_key,
+   char *namebuf, u32 name_len)
+{
+   struct btrfs_trans_handle *trans;
+   int ret;
+
+   trans = btrfs_start_transaction(root, 1);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   error("failed to start transaction: %d", ret);
+   return ret;
+   }
+
+   ret = delete_corrupted_dir_item(trans, root, di_key, namebuf, name_len);
+   if (ret < 0) {
+   btrfs_abort_transaction(trans, ret);
+   } else {
+   ret = btrfs_commit_transaction(trans, root);
+   if (ret < 0)
+   error("failed to commit transaction: %d", ret);
+   }
+   return ret;
+}
 /*
  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
  *
  * Returns error after repair
  */
-static int repair_dir_item(struct btrfs_root *root,