Re: [PATCH 1/2] Btrfs: check_int: use the known block location
The xfstest btrfs/014 which tests the balance operation caused issues with the check_int module. The attempt was made to use btrfs_map_block() to find the physical location for a written block. However, this was not at all needed since the location of the written block was known since a hook to submit_bio() was the reason for entering the check_int module. Additionally, after a block relocation it happened that btrfs_map_block() failed causing misleading error messages afterwards. This patch changes the check_int module to use the known information of the physical location from the bio. Reported-by: Wang Shilong wangshilong1...@gmail.com Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de This passed my Tests after applying both patches. Tested-by: Wang Shilong wangshilong1...@gmail.com --- fs/btrfs/check-integrity.c | 66 -- 1 file changed, 11 insertions(+), 55 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ce92ae30250f..65fc2e0bbc4a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -326,9 +326,6 @@ static int btrfsic_handle_extent_data(struct btrfsic_state *state, static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, struct btrfsic_block_data_ctx *block_ctx_out, int mirror_num); -static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, - u32 len, struct block_device *bdev, - struct btrfsic_block_data_ctx *block_ctx_out); static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); static int btrfsic_read_block(struct btrfsic_state *state, struct btrfsic_block_data_ctx *block_ctx); @@ -1609,25 +1606,6 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, return ret; } -static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, - u32 len, struct block_device *bdev, - struct btrfsic_block_data_ctx *block_ctx_out) -{ - block_ctx_out-dev = btrfsic_dev_state_lookup(bdev); - block_ctx_out-dev_bytenr = bytenr; - block_ctx_out-start = bytenr; - block_ctx_out-len = len; - block_ctx_out-datav = NULL; - block_ctx_out-pagev = NULL; - block_ctx_out-mem_to_free = NULL; - if (NULL != block_ctx_out-dev) { - return 0; - } else { - printk(KERN_INFO btrfsic: error, cannot lookup dev (#2)!\n); - return -ENXIO; - } -} - static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) { if (block_ctx-mem_to_free) { @@ -2004,24 +1982,13 @@ again: } } - if (block-is_superblock) - ret = btrfsic_map_superblock(state, bytenr, - processed_len, - bdev, block_ctx); - else - ret = btrfsic_map_block(state, bytenr, processed_len, - block_ctx, 0); - if (ret) { - printk(KERN_INFO -btrfsic: btrfsic_map_block(root @%llu) - failed!\n, bytenr); - goto continue_loop; - } - block_ctx.datav = mapped_datav; - /* the following is required in case of writes to mirrors, - * use the same that was used for the lookup */ block_ctx.dev = dev_state; block_ctx.dev_bytenr = dev_bytenr; + block_ctx.start = bytenr; + block_ctx.len = processed_len; + block_ctx.pagev = NULL; + block_ctx.mem_to_free = NULL; + block_ctx.datav = mapped_datav; if (is_metadata || state-include_extent_data) { block-never_written = 0; @@ -2135,10 +2102,6 @@ again: /* this is getting ugly for the * include_extent_data case... */ bytenr = 0; /* unknown */ - block_ctx.start = bytenr; - block_ctx.len = processed_len; - block_ctx.mem_to_free = NULL; - block_ctx.pagev = NULL; } else { processed_len = state-metablock_size; bytenr = btrfs_stack_header_bytenr( @@ -2151,22 +2114,15 @@ again: Written block @%llu (%s/%llu/?) !found in hash table, M.\n, bytenr, dev_state-name, dev_bytenr); - - ret =
Re: [PATCH 2/2] Btrfs: check-int: don't complain about balanced blocks
The xfstest btrfs/014 which tests the balance operation caused that the check_int module complained that known blocks changed their physical location. Since this is not an error in this case, only print such message if the verbose mode was enabled. Reported-by: Wang Shilong wangshilong1...@gmail.com Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de This passed my Tests after applying both patches. Tested-by: Wang Shilong wangshilong1...@gmail.com --- fs/btrfs/check-integrity.c | 87 ++ 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 65fc2e0bbc4a..65226d7c9fe0 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1325,24 +1325,28 @@ static int btrfsic_create_link_to_next_block( l = NULL; next_block-generation = BTRFSIC_GENERATION_UNKNOWN; } else { - if (next_block-logical_bytenr != next_bytenr - !(!next_block-is_metadata - 0 == next_block-logical_bytenr)) { - printk(KERN_INFO -Referenced block @%llu (%s/%llu/%d) - found in hash table, %c, - bytenr mismatch (!= stored %llu).\n, -next_bytenr, next_block_ctx-dev-name, -next_block_ctx-dev_bytenr, *mirror_nump, -btrfsic_get_block_type(state, next_block), -next_block-logical_bytenr); - } else if (state-print_mask BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO -Referenced block @%llu (%s/%llu/%d) - found in hash table, %c.\n, -next_bytenr, next_block_ctx-dev-name, -next_block_ctx-dev_bytenr, *mirror_nump, -btrfsic_get_block_type(state, next_block)); + if (state-print_mask BTRFSIC_PRINT_MASK_VERBOSE) { + if (next_block-logical_bytenr != next_bytenr + !(!next_block-is_metadata + 0 == next_block-logical_bytenr)) + printk(KERN_INFO +Referenced block @%llu (%s/%llu/%d) + found in hash table, %c, + bytenr mismatch (!= stored %llu).\n, +next_bytenr, next_block_ctx-dev-name, +next_block_ctx-dev_bytenr, *mirror_nump, +btrfsic_get_block_type(state, + next_block), +next_block-logical_bytenr); + else + printk(KERN_INFO +Referenced block @%llu (%s/%llu/%d) + found in hash table, %c.\n, +next_bytenr, next_block_ctx-dev-name, +next_block_ctx-dev_bytenr, *mirror_nump, +btrfsic_get_block_type(state, + next_block)); + } next_block-logical_bytenr = next_bytenr; next_block-mirror_num = *mirror_nump; @@ -1528,7 +1532,9 @@ static int btrfsic_handle_extent_data( return -1; } if (!block_was_created) { - if (next_block-logical_bytenr != next_bytenr + if ((state-print_mask + BTRFSIC_PRINT_MASK_VERBOSE) + next_block-logical_bytenr != next_bytenr !(!next_block-is_metadata 0 == next_block-logical_bytenr)) { printk(KERN_INFO @@ -1881,25 +1887,30 @@ again: dev_state, dev_bytenr); } - if (block-logical_bytenr != bytenr - !(!block-is_metadata - block-logical_bytenr == 0)) - printk(KERN_INFO -Written block @%llu (%s/%llu/%d) - found in hash table, %c, - bytenr mismatch - (!= stored %llu).\n, -bytenr, dev_state-name, dev_bytenr, -
Re: unexplainable corruptions 3.17.0
On Thu, Oct 16, 2014 at 11:17:26AM +0200, Tomasz Torcz wrote: Hi, Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. System details: 3.17.0-301.fc21.x86_64 btrfs: raid1 over 2x dm-crypted 6TB HDDs. mount opts: rw,relatime,seclabel,compress=lzo,space_cache Broken files are in /var/log/journal directory. This directory is set NOCOW with chattr, all the files within too. Example of broken file: system@0005057fe87730cf-6d3d85ed59bd70ae.journal~ When read with dd_rescue, there are many I/O errors reported, the summary looks like that (x = error): -..-..x---x.-..-..-...-..-..-...- 100% Reads with cat, hexdump fails with: read(4, 0x1001000, 65536) = -1 EIO (Input/output error) But btrfs dev stat reports no errors! $ btrfs dev stat . [/dev/dm-0].write_io_errs 0 [/dev/dm-0].read_io_errs0 [/dev/dm-0].flush_io_errs 0 [/dev/dm-0].corruption_errs 0 [/dev/dm-0].generation_errs 0 [/dev/dm-1].write_io_errs 0 [/dev/dm-1].read_io_errs0 [/dev/dm-1].flush_io_errs 0 [/dev/dm-1].corruption_errs 0 [/dev/dm-1].generation_errs 0 There are no hardware errors in dmesg. This is perplexing. How to find out what is causing the brokeness and howto avoid it in the future? Does scrub work for you? thanks, -liubo -- Tomasz .. oo o. oo o. .o .o o. o. oo o. .. Torcz.. .o .o .o .o oo oo .o .. .. oo oo o.o.o. .o .. o. o. o. o. o. o. oo .. .. o. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] Btrfs: check-int: don't complain about balanced blocks
On Thu, 16 Oct 2014 17:48:49 +0200, Stefan Behrens wrote: The xfstest btrfs/014 which tests the balance operation caused that the check_int module complained that known blocks changed their physical location. Since this is not an error in this case, only print such message if the verbose mode was enabled. Reported-by: Wang Shilong wangshilong1...@gmail.com Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de --- fs/btrfs/check-integrity.c | 87 ++ 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 65fc2e0bbc4a..65226d7c9fe0 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1325,24 +1325,28 @@ static int btrfsic_create_link_to_next_block( l = NULL; next_block-generation = BTRFSIC_GENERATION_UNKNOWN; } else { - if (next_block-logical_bytenr != next_bytenr - !(!next_block-is_metadata - 0 == next_block-logical_bytenr)) { - printk(KERN_INFO -Referenced block @%llu (%s/%llu/%d) - found in hash table, %c, - bytenr mismatch (!= stored %llu).\n, -next_bytenr, next_block_ctx-dev-name, -next_block_ctx-dev_bytenr, *mirror_nump, -btrfsic_get_block_type(state, next_block), -next_block-logical_bytenr); - } else if (state-print_mask BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO -Referenced block @%llu (%s/%llu/%d) - found in hash table, %c.\n, -next_bytenr, next_block_ctx-dev-name, -next_block_ctx-dev_bytenr, *mirror_nump, -btrfsic_get_block_type(state, next_block)); + if (state-print_mask BTRFSIC_PRINT_MASK_VERBOSE) { + if (next_block-logical_bytenr != next_bytenr + !(!next_block-is_metadata + 0 == next_block-logical_bytenr)) + printk(KERN_INFO +Referenced block @%llu (%s/%llu/%d) + found in hash table, %c, + bytenr mismatch (!= stored %llu).\n, According to the coding style, we don't expect the user-visible strings are broken. Thanks Miao +next_bytenr, next_block_ctx-dev-name, +next_block_ctx-dev_bytenr, *mirror_nump, +btrfsic_get_block_type(state, + next_block), +next_block-logical_bytenr); + else + printk(KERN_INFO +Referenced block @%llu (%s/%llu/%d) + found in hash table, %c.\n, +next_bytenr, next_block_ctx-dev-name, +next_block_ctx-dev_bytenr, *mirror_nump, +btrfsic_get_block_type(state, + next_block)); + } next_block-logical_bytenr = next_bytenr; next_block-mirror_num = *mirror_nump; @@ -1528,7 +1532,9 @@ static int btrfsic_handle_extent_data( return -1; } if (!block_was_created) { - if (next_block-logical_bytenr != next_bytenr + if ((state-print_mask + BTRFSIC_PRINT_MASK_VERBOSE) + next_block-logical_bytenr != next_bytenr !(!next_block-is_metadata 0 == next_block-logical_bytenr)) { printk(KERN_INFO @@ -1881,25 +1887,30 @@ again: dev_state, dev_bytenr); } - if (block-logical_bytenr != bytenr - !(!block-is_metadata - block-logical_bytenr == 0)) - printk(KERN_INFO -Written block @%llu (%s/%llu/%d) - found in hash table, %c, - bytenr mismatch - (!= stored %llu).\n, -
Re: unexplainable corruptions 3.17.0
On Fri, Oct 17, 2014 at 04:02:03PM +0800, Liu Bo wrote: Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. System details: 3.17.0-301.fc21.x86_64 btrfs: raid1 over 2x dm-crypted 6TB HDDs. mount opts: rw,relatime,seclabel,compress=lzo,space_cache Reads with cat, hexdump fails with: read(4, 0x1001000, 65536) = -1 EIO (Input/output error) Does scrub work for you? As there seem to be no way to scrub individual files, I've started scrub of full volume. It will take some hours to finish. Meanwhile, could you satisfy my curiosity what would scrub do that wouldn't be done by just reading the whole file? -- Tomasz Torcz Never underestimate the bandwidth of a station xmpp: zdzich...@chrome.plwagon filled with backup tapes. -- Jim Gray -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unexplainable corruptions 3.17.0
Am Donnerstag, 16. Oktober 2014, 11:17:26 schrieb Tomasz Torcz: Hi, Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. System details: 3.17.0-301.fc21.x86_64 btrfs: raid1 over 2x dm-crypted 6TB HDDs. mount opts: rw,relatime,seclabel,compress=lzo,space_cache Broken files are in /var/log/journal directory. This directory is set NOCOW with chattr, all the files within too. Example of broken file: system@0005057fe87730cf-6d3d85ed59bd70ae.journal~ When read with dd_rescue, there are many I/O errors reported, the summary looks like that (x = error): -..-..x---x.-..-..-...-..-..-...- 100% sounds like https://patchwork.kernel.org/patch/4929981/ to me. We urgently need some stable patches or people will quickly corrupt their filesystems. Marc signature.asc Description: This is a digitally signed message part.
Re: unexplainable corruptions 3.17.0
On Fri, Oct 17, 2014 at 10:10:09AM +0200, Tomasz Torcz wrote: On Fri, Oct 17, 2014 at 04:02:03PM +0800, Liu Bo wrote: Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. System details: 3.17.0-301.fc21.x86_64 btrfs: raid1 over 2x dm-crypted 6TB HDDs. mount opts: rw,relatime,seclabel,compress=lzo,space_cache Reads with cat, hexdump fails with: read(4, 0x1001000, 65536) = -1 EIO (Input/output error) Does scrub work for you? As there seem to be no way to scrub individual files, I've started scrub of full volume. It will take some hours to finish. Meanwhile, could you satisfy my curiosity what would scrub do that wouldn't be done by just reading the whole file? It checks both copies. Reading the file will only read one of the copies of any given block (so if that's good and the other copy is bad, it won't fix anything). Hugo. -- === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk === PGP key: 65E74AC0 from wwwkeys.eu.pgp.net or http://www.carfax.org.uk --- The future isn't what it used to be. --- signature.asc Description: Digital signature
Re: unexplainable corruptions 3.17.0
On Fri, Oct 17, 2014 at 10:10:09AM +0200, Tomasz Torcz wrote: On Fri, Oct 17, 2014 at 04:02:03PM +0800, Liu Bo wrote: Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. System details: 3.17.0-301.fc21.x86_64 btrfs: raid1 over 2x dm-crypted 6TB HDDs. mount opts: rw,relatime,seclabel,compress=lzo,space_cache Reads with cat, hexdump fails with: read(4, 0x1001000, 65536) = -1 EIO (Input/output error) Does scrub work for you? As there seem to be no way to scrub individual files, I've started scrub of full volume. It will take some hours to finish. Meanwhile, could you satisfy my curiosity what would scrub do that wouldn't be done by just reading the whole file? (Hugo has answered that in this thread.) Well..I don't know exactly what's the cause, but as the file is NOCOW, it writes data in place, have you experienced a hard reboot or something recently? And any message in dmesg log while getting EIO by reading the file? thanks, -liubo -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unexplainable corruptions 3.17.0
On Fri, Oct 17, 2014 at 04:29:36PM +0800, Liu Bo wrote: On Fri, Oct 17, 2014 at 10:10:09AM +0200, Tomasz Torcz wrote: On Fri, Oct 17, 2014 at 04:02:03PM +0800, Liu Bo wrote: Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. read(4, 0x1001000, 65536) = -1 EIO (Input/output error) Well..I don't know exactly what's the cause, but as the file is NOCOW, it writes data in place, have you experienced a hard reboot or something recently? Nothing like that. Server is on an UPS, there were couple normal shutdowns this year (few kernel upgrades). And any message in dmesg log while getting EIO by reading the file? Nothing in dmesg, no btrfs messages, no SCSI/SATA errors, nothing. That's why I find those corruptions mysterious. Maybe there is some way to inspect internal btrfs state and find out what causing the problems? Or maybe this is related to patch mentioned in this thread? -- Tomasz Torcz Never underestimate the bandwidth of a station xmpp: zdzich...@chrome.plwagon filled with backup tapes. -- Jim Gray -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PULL] Btrfs corruption fix
On 10/16/2014 03:51 AM, Chris Mason wrote: Hi Linus, I'm testing a pull with more fixes, but wanted to get this one out so Greg can pick it up. The corruption isn't easy to hit, you have to do a readonly snapshot and have orphans in the snapshot. But my review and testing missed the bug. Filipe has added a better xfstest to cover it. Please pull: git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git for-linus Sorry Linus, I realized this morning I forgot my sob. I've pushed out an updated commit to a new for-linus-update branch. The code and diffstat are exactly the same: git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git for-linus-update Chris Mason (1) commits (+33/-36): Revert Btrfs: race free update of commit root for ro snapshots Total: (1) commits (+33/-36) fs/btrfs/inode.c | 36 fs/btrfs/ioctl.c | 33 + 2 files changed, 33 insertions(+), 36 deletions(-) -chris -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unexplainable corruptions 3.17.0
Liu Bo posted on Fri, 17 Oct 2014 16:02:03 +0800 as excerpted: On Thu, Oct 16, 2014 at 11:17:26AM +0200, Tomasz Torcz wrote: Hi, Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. System details: 3.17.0-301.fc21.x86_64 btrfs: raid1 over 2x dm-crypted 6TB HDDs. mount opts: rw,relatime,seclabel,compress=lzo,space_cache Broken files are in /var/log/journal directory. This directory is set NOCOW with chattr, all the files within too. Does scrub work for you? NOCOW implies no checksum, so scrub shouldn't be able to help. Some time back people were reporting problems with corrupted journald journal files, but I've seen no such reports in a long time. This isn't likely much help for your (OP's) use-case, but FWIW, here's what I did with journald. When I switched to systemd here, I set it to volatile storage only, and kept syslog-ng setup for longer term storage. I arranged things so journald's volatile logs had enough room to grow for a normal single session in the /run/log tmpfs. That gives me the nice journald systemd integration, systemctl status reporting the last few log entries for a specific service, etc. But everything still gets passed to syslog-ng (which being on gentoo, I set the systemd USE flag for, so it integrates nicely) as well, and that spits out my normal text logs just as I had it setup to do long before systemd ever came along. It's those that I keep on non-volatile storage so they stick around thru a reboot, and they play nicely with btrfs so I've not had to worry about what journald's binary files might do. Btw, unless you have a need for relatime, noatime is strongly recommended for btrfs. -- Duncan - List replies preferred. No HTML msgs. Every nonfree program has a lord, a master -- and if you use the program, he is your master. Richard Stallman -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 2/2] Btrfs: check-int: don't complain about balanced blocks
The xfstest btrfs/014 which tests the balance operation caused that the check_int module complained that known blocks changed their physical location. Since this is not an error in this case, only print such message if the verbose mode was enabled. Reported-by: Wang Shilong wangshilong1...@gmail.com Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de Tested-by: Wang Shilong wangshilong1...@gmail.com --- v1 - v2: Don't break user-visible strings. fs/btrfs/check-integrity.c | 80 -- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 65fc2e0bbc4a..1dcf66ad5b51 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1325,24 +1325,25 @@ static int btrfsic_create_link_to_next_block( l = NULL; next_block-generation = BTRFSIC_GENERATION_UNKNOWN; } else { - if (next_block-logical_bytenr != next_bytenr - !(!next_block-is_metadata - 0 == next_block-logical_bytenr)) { - printk(KERN_INFO - Referenced block @%llu (%s/%llu/%d) - found in hash table, %c, - bytenr mismatch (!= stored %llu).\n, - next_bytenr, next_block_ctx-dev-name, - next_block_ctx-dev_bytenr, *mirror_nump, - btrfsic_get_block_type(state, next_block), - next_block-logical_bytenr); - } else if (state-print_mask BTRFSIC_PRINT_MASK_VERBOSE) - printk(KERN_INFO - Referenced block @%llu (%s/%llu/%d) - found in hash table, %c.\n, - next_bytenr, next_block_ctx-dev-name, - next_block_ctx-dev_bytenr, *mirror_nump, - btrfsic_get_block_type(state, next_block)); + if (state-print_mask BTRFSIC_PRINT_MASK_VERBOSE) { + if (next_block-logical_bytenr != next_bytenr + !(!next_block-is_metadata + 0 == next_block-logical_bytenr)) + printk(KERN_INFO + Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n, + next_bytenr, next_block_ctx-dev-name, + next_block_ctx-dev_bytenr, *mirror_nump, + btrfsic_get_block_type(state, + next_block), + next_block-logical_bytenr); + else + printk(KERN_INFO + Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n, + next_bytenr, next_block_ctx-dev-name, + next_block_ctx-dev_bytenr, *mirror_nump, + btrfsic_get_block_type(state, + next_block)); + } next_block-logical_bytenr = next_bytenr; next_block-mirror_num = *mirror_nump; @@ -1528,7 +1529,9 @@ static int btrfsic_handle_extent_data( return -1; } if (!block_was_created) { - if (next_block-logical_bytenr != next_bytenr + if ((state-print_mask +BTRFSIC_PRINT_MASK_VERBOSE) + next_block-logical_bytenr != next_bytenr !(!next_block-is_metadata 0 == next_block-logical_bytenr)) { printk(KERN_INFO @@ -1881,25 +1884,26 @@ again: dev_state, dev_bytenr); } - if (block-logical_bytenr != bytenr - !(!block-is_metadata - block-logical_bytenr == 0)) - printk(KERN_INFO - Written block @%llu (%s/%llu/%d) - found in hash table, %c, - bytenr mismatch - (!= stored %llu).\n, - bytenr, dev_state-name, dev_bytenr, - block-mirror_num, -
Re: Poll: time to switch skinny-metadata on by default?
Hello, the core of skinny-metadata feature has been merged in 3.10 (Jun 2013) and has been reportedly used by many people. No major bugs were reported lately unless I missed them. so far I haven't succeeded running btrfs balance on a large skinny-metadata fs -- segfault, kernel bug, reproducible. No such problems on ^skinny-metadata fs (same disks, same data). Tried both several times on 3.17. More info in comments 10,14 in https://bugzilla.kernel.org/show_bug.cgi?id=64961 Regards, Petr -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unexplainable corruptions 3.17.0
On Fri, Oct 17, 2014 at 4:54 AM, Tomasz Torcz to...@pipebreaker.pl wrote: On Fri, Oct 17, 2014 at 04:29:36PM +0800, Liu Bo wrote: On Fri, Oct 17, 2014 at 10:10:09AM +0200, Tomasz Torcz wrote: On Fri, Oct 17, 2014 at 04:02:03PM +0800, Liu Bo wrote: Recently I've observed some corruptions to systemd's journal files which are somewhat puzzling. This is especially worrying as this is btrfs raid1 setup and I expected auto-healing. read(4, 0x1001000, 65536) = -1 EIO (Input/output error) Well..I don't know exactly what's the cause, but as the file is NOCOW, it writes data in place, have you experienced a hard reboot or something recently? Nothing like that. Server is on an UPS, there were couple normal shutdowns this year (few kernel upgrades). And any message in dmesg log while getting EIO by reading the file? Nothing in dmesg, no btrfs messages, no SCSI/SATA errors, nothing. That's why I find those corruptions mysterious. Maybe there is some way to inspect internal btrfs state and find out what causing the problems? Or maybe this is related to patch mentioned in this thread? This sounds like the problem fixed with some patches to our extent mapping code that went in with the merge window. I've cherry picked a few for stable and I'm running them through tests now. They are in my stable-3.17 branch, and I'll send to Greg once Linus grabs the revert for the last one. But, if you want to try that branch out, it may fix this EIO. Otherwise we'll start sending you debugging. -chris -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unexplainable corruptions 3.17.0
On Oct 16, 2014, at 5:17 AM, Tomasz Torcz to...@pipebreaker.pl wrote: Broken files are in /var/log/journal directory. This directory is set NOCOW with chattr, all the files within too. Example of broken file: system@0005057fe87730cf-6d3d85ed59bd70ae.journal~ What do you get for 'journalctl --verify' ? I'm curious if any journal files are considered corrupt by journalctl, and if there's parity between journalctl and dd_rescue when it comes to good/bad journals. When read with dd_rescue, there are many I/O errors reported, the summary looks like that (x = error): -..-..x---x.-..-..-...-..-..-...- 100% Reads with cat, hexdump fails with: read(4, 0x1001000, 65536) = -1 EIO (Input/output error) Yeah weird, I'd expect in any case that there'd be a kernel message, whether it's a Btrfs or hardware problem. Chris Murphy -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unexplainable corruptions 3.17.0
On Oct 17, 2014, at 7:38 AM, Duncan 1i5t5.dun...@cox.net wrote: When I switched to systemd here, I set it to volatile storage only, and kept syslog-ng setup for longer term storage. I arranged things so journald's volatile logs had enough room to grow for a normal single session in the /run/log tmpfs. That gives me the nice journald systemd integration, systemctl status reporting the last few log entries for a specific service, etc. But everything still gets passed to syslog-ng For the uninitiated: To do the above, delete /var/log/journal and install syslog daemon of choice (and is systemd-journald compatible of course). That's it. By deleting /var/log/journal, systemd-journald will write logs to /run/log/journal. Chris Murphy -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/1] btrfs-progs: introduce a proper structure on which cli will call register-device ioctl
On Thu, Oct 16, 2014 at 10:56:37PM +0800, Anand Jain wrote: Quite strange. I didn't see that problem here. Thanks for checking. Yeah, it must be some oddity in my setup. Wiping all the devices makes no change and I don't see anyting strange when scratch is mounted: # btrfs fi show -m Label: 'TestLabel.006' uuid: 831545ca-35de-4485-94f0-dae2bb9fb6a7 Total devices 4 FS bytes used 192.00KiB devid1 size 10.00GiB used 20.00MiB path /dev/sda9 devid2 size 10.00GiB used 256.00MiB path /dev/sda10 devid3 size 10.00GiB used 0.00B path /dev/sda11 devid4 size 10.00GiB used 0.00B path /dev/sda12 same without -m. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs-progs: check, ability to detect and fix outdated snapshot root items
This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs $seqres.full 21 _scratch_mount snapshot_cmd=$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT snapshot_cmd=$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\` run_check $FSSTRESS_PROG -p $procs \ -x $snapshot_cmd -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=1 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 3442, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 3442, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana fdman...@suse.com --- cmds-check.c | 356 + disk-io.c | 2 + extent-tree.c | 22 +- tests/fsck-tests.sh| 15 +- tests/fsck-tests/006-bad_root_items_fs.tar.xz | Bin 0 - 24980 bytes .../fsck-tests/007-bad_root_items_fs_skinny.tar.xz | Bin 0 - 26520 bytes utils.c| 21 ++ utils.h| 2 + 8 files changed, 395 insertions(+), 23 deletions(-) create mode 100644 tests/fsck-tests/006-bad_root_items_fs.tar.xz create mode 100644 tests/fsck-tests/007-bad_root_items_fs_skinny.tar.xz diff --git a/cmds-check.c b/cmds-check.c index 310eb2a..2a5f823 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -7198,6 +7198,345 @@ static int fill_csum_tree(struct btrfs_trans_handle *trans, return ret; } +struct root_item_info { + /* level of the root */ + u8 level; + /* number of nodes at this level, must be 1 for a
Re: Random file system corruption in 3.17 (not BTRFS related...?)
On Wed, Oct 15, 2014 at 9:20 PM, Josef Bacik jba...@fb.com wrote: On 10/15/2014 03:30 PM, Rich Freeman wrote: On Wed, Oct 15, 2014 at 10:30 AM, Josef Bacik jba...@fb.com wrote: We've found it, the Fedora guys are reverting the bad patch now, we'll get the fix sent back to stable shortly. Sorry about that. After reverting this commit, can the bad snapshots be deleted/repaired/etc without wiping and restoring the entire filesystem? Copying 2.3TB of data isn't a particularly fast operation... I would certainly like to make fsck repair this sort of problem, let me reproduce the corruption locally and then make fsck fix it and then you can use that. Thanks, I just sent out a patch for fsck to fix this issue - i.e. bad read-only snapshots (inaccessible without errors, impossible to delete, etc). It fixes the snapshots if, and only if, you haven't run fsck in repair mode (--repair) before, as that would touch back references and other metadata as it didn't expect for root items to incorrect (which is essentially what the snapshots bug made). The patch is this one: https://patchwork.kernel.org/patch/5098331/ Also, if you have errors accessing files through a path that doesn't contain any of the read-only snapshots, it's possible that it's the corruption bug we had in 3.17 - bad extent map manipulation, that manifests itself in several ways (e.g. reports: http://www.spinics.net/lists/linux-btrfs/msg38045.html and http://www.spinics.net/lists/linux-btrfs/msg37567.html). Anyway, if you run into further issues, please report them. thanks Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- Filipe David Manana, Reasonable men adapt themselves to the world. Unreasonable men adapt the world to themselves. That's why all progress depends on unreasonable men. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] btrfs-progs: run fsck image tests in filename order
Signed-off-by: David Sterba dste...@suse.cz --- Based on Btrfs-progs: check, ability to detect and fix outdated snapshot root items tests/fsck-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fsck-tests.sh b/tests/fsck-tests.sh index 3f04626eda8c..8987d0442a6b 100644 --- a/tests/fsck-tests.sh +++ b/tests/fsck-tests.sh @@ -32,7 +32,7 @@ run_check make btrfs-corrupt-block # image (the backing file of a loop device, as a sparse file). The reason for # keeping some as tarballs of raw images is that for these cases btrfs-image # isn't able to preserve all the (bad) filesystem structure for some reason. -for i in $(find $here/tests/fsck-tests -name '*.img' -o -name '*.tar.xz') +for i in $(find $here/tests/fsck-tests -name '*.img' -o -name '*.tar.xz' | sort) do echo [TEST]$(basename $i) echo testing image $i $RESULT -- 2.1.1 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs-progs: check, ability to detect and fix outdated snapshot root items
On Fri, Oct 17, 2014 at 06:20:08PM +0100, Filipe Manana wrote: This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. Thanks, this is going to 3.17. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ccache on btrfs
Hi Is there an issue with btrfs ccache and multiple threads during compilation? I'm experiencing kernel panics while building Linux kernel on a btrfs system with -j8 and ccache enabled. Google suggest there was some issue on earlier kernels but my kernel is 3.16+ Anyone can confirm? Thanks Leonidas -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unexplainable corruptions 3.17.0
On Fri, Oct 17, 2014 at 8:53 AM, Chris Mason c...@fb.com wrote: This sounds like the problem fixed with some patches to our extent mapping code that went in with the merge window. I've cherry picked a few for stable and I'm running them through tests now. They are in my stable-3.17 branch, and I'll send to Greg once Linus grabs the revert for the last one. Just for clarity - when can we expect to see these in the kernel? I wasn't sure which merge windows you're referring to. I take it that 3.17.1 is still unpatched (for this and the readonly snapshot issue - which requires reverting 9c3b306e1c9e6be4be09e99a8fe2227d1005effc). -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Poll: time to switch skinny-metadata on by default?
On 10/17/2014 08:30 AM, Petr Janecek wrote: Hello, the core of skinny-metadata feature has been merged in 3.10 (Jun 2013) and has been reportedly used by many people. No major bugs were reported lately unless I missed them. so far I haven't succeeded running btrfs balance on a large skinny-metadata fs -- segfault, kernel bug, reproducible. No such problems on ^skinny-metadata fs (same disks, same data). Tried both several times on 3.17. More info in comments 10,14 in https://urldefense.proofpoint.com/v1/url?u=https://bugzilla.kernel.org/show_bug.cgi?id%3D64961k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0Ar=cKCbChRKsMpTX8ybrSkonQ%3D%3D%0Am=3qxE39iiu%2BoZB%2F05dE7hnGHZojWhjjijrtjNYki0NFg%3D%0As=b262347a1ad2505ebdcb21dcc9f0944a14c174a1dcf447746ce196faddd99092 I can't reproduce this, how big is your home directory, and are you still seeing corruptions after just rsyncing to a clean fs? Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs soft lockups: locks gone crazy
ping? On Tue, 2014-10-14 at 00:27 -0700, Davidlohr Bueso wrote: Hello, I'm getting massive amounts of cpu soft lockups in Linus's tree for today. This occurs almost immediately and is very reproducible in aim7 disk workloads using btrfs: kernel:[ 559.800017] NMI watchdog: BUG: soft lockup - CPU#114 stuck for 22s! [reaim:44435] ... [ 999.800070] Modules linked in: iptable_filter(E) ip_tables(E) x_tables(E) rpcsec_gss_krb5(E) auth_rpcgss(E) nfsv4(E) dns_resolver(E) nfs(E) lockd(E) grace(E) sunrpc(E) fscache(E) iscsi_ibft(E) iscsi_boot_sysfs(E) af_packet(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) ext4(E) crc16(E) mbcache(E) coretemp(E) jbd2(E) kvm_intel(E) kvm(E) crct10dif_pclmul(E) crc32_pclmul(E) iTCO_wdt(E) sb_edac(E) iTCO_vendor_support(E) joydev(E) ghash_clmulni_intel(E) aesni_intel(E) aes_x86_64(E) lrw(E) gf128mul(E) glue_helper(E) ablk_helper(E) cryptd(E) pcspkr(E) edac_core(E) i2c_i801(E) lpc_ich(E) mfd_core(E) ipmi_si(E) ipmi_msghandler(E) wmi(E) processor(E) shpchp(E) acpi_pad(E) button(E) dm_mod(E) btrfs(E) xor(E) raid6_pq(E) sd_mod(E) hid_generic(E) usbhid(E) sr_mod(E) cdrom(E) mgag200(E) syscopyarea(E) sysfillrect(E) ehci_pci(E) sysimgblt(E) i2c_algo_bit(E) drm_kms_helper(E) ixgbe(E) ahci(E) ehci_hcd(E) ttm(E) mdio(E) libahci(E) ptp(E) crc32c_intel(E) mpt2sas(E) pps_core(E) usbcore(E) drm(E) libata(E) raid_class(E) usb_common(E) dca(E) scsi_transport_sas(E) sg(E) scsi_mod(E) autofs4(E) [ 999.800070] CPU: 53 PID: 1027 Comm: kworker/u292:3 Tainted: G EL 3.17.0-3-default+ #2 [ 999.800070] Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BIVTSDP1.86B.0044.R01.1310221705 10/22/2013 [ 999.800070] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs] [ 999.800070] task: 882054e76190 ti: 882054e78000 task.ti: 882054e78000 [ 999.800070] RIP: 0010:[810a2e46] [810a2e46] queue_read_lock_slowpath+0x86/0x90 [ 999.800070] RSP: 0018:882054e7ba00 EFLAGS: 0216 [ 999.800070] RAX: 8a64 RBX: 882054e7b9b0 RCX: 8a70 [ 999.800070] RDX: 8a70 RSI: 00c0 RDI: 881833b49ea0 [ 999.800070] RBP: 882054e7ba00 R08: 881833b49e64 R09: 0001 [ 999.800070] R10: R11: R12: 881047377f00 [ 999.800070] R13: 0007 R14: 88203bf6d520 R15: 8810 [ 999.800070] FS: () GS:88207f50() knlGS: [ 999.800070] CS: 0010 DS: ES: CR0: 80050033 [ 999.800070] CR2: 006a7f80 CR3: 01a14000 CR4: 001407e0 [ 999.800070] Stack: [ 999.800070] 882054e7ba10 81573cfc 882054e7ba70 a033349b [ 999.800070] 882038e9ac80 88203b58fd00 882038cd4ed8 [ 999.800070] 882054e7ba68 881833b49e30 882057fb4800 0001 [ 999.800070] Call Trace: [ 999.800070] [81573cfc] _raw_read_lock+0x1c/0x30 [ 999.800070] [a033349b] btrfs_tree_read_lock+0x5b/0x120 [btrfs] [ 999.800070] [a02d1f6b] btrfs_read_lock_root_node+0x3b/0x50 [btrfs] [ 999.800070] [a02d726a] btrfs_search_slot+0x53a/0xab0 [btrfs] [ 999.800070] [812c84c7] ? cpumask_next_and+0x37/0x50 [ 999.800070] [a02eebf7] btrfs_lookup_file_extent+0x37/0x40 [btrfs] [ 999.800070] [a030e1ca] __btrfs_drop_extents+0x16a/0xdb0 [btrfs] [ 999.800070] [a03170cc] ? __set_extent_bit+0x22c/0x550 [btrfs] [ 999.800070] [a02d1cea] ? btrfs_alloc_path+0x1a/0x20 [btrfs] [ 999.800070] [a02fe26e] insert_reserved_file_extent.constprop.58+0x9e/0x2f0 [btrfs] [ 999.800070] [a0303ff5] btrfs_finish_ordered_io+0x2e5/0x600 [btrfs] ... [ 1011.800059] CPU: 2 PID: 1 Comm: systemd Tainted: GEL 3.17.0-3-default+ #2 [ 1011.800059] Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BIVTSDP1.86B.0044.R01.1310221705 10/22/2013 [ 1011.800059] task: 880857eb8010 ti: 880857ebc000 task.ti: 880857ebc000 [ 1011.800059] RIP: 0010:[810a2e46] [810a2e46] queue_read_lock_slowpath+0x86/0x90 [ 1011.800059] RSP: 0018:880857ebfce0 EFLAGS: 0202 [ 1011.800059] RAX: 8a64 RBX: 8808549cd910 RCX: 8a74 [ 1011.800059] RDX: 8a74 RSI: 00c4 RDI: 881833b49ea0 [ 1011.800059] RBP: 880857ebfce0 R08: 881833b49e64 R09: [ 1011.800059] R10: e8e7ffc4ed60 R11: R12: 8808549cd900 [ 1011.800059] R13: R14: R15: 880855e931e0 [ 1011.800059] FS: 7f12cb6ec880() GS:88087f84() knlGS: [ 1011.800059] CS: 0010 DS: ES: CR0: 80050033 [ 1011.800059] CR2: 7f12cb707000 CR3: 002054f16000 CR4: 001407e0 [ 1011.800059]
Re: Ccache on btrfs
On 17/10/14, Timofey Titovets wrote: i use ccache and often compile linux from git, I don't catch any errors with it. Interesting, I re-enabled ccache on an ext4 partition (HDD) and worked fine. So I suspect it's something to do with btrfs and SSD. My mount options are: /dev/sda3 on /home type btrfs (rw,relatime,ssd,discard,nospace_cache) The problem is that I can't get hold of kernel panic since it's not in the journal [1]. I don't know a way to get hold of the kernel panic log to help anyway. What would it be useful to debug it? [1] using systemd Leonidas -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs soft lockups: locks gone crazy
On 10/14/2014 03:27 AM, Davidlohr Bueso wrote: Hello, I'm getting massive amounts of cpu soft lockups in Linus's tree for today. This occurs almost immediately and is very reproducible in aim7 disk workloads using btrfs: I'm trying to reproduce but it's not popping for me. What is the setup of your fs? mkfs options, mount options etc. And how are you running aim7? I'm using reaim with the default reaim.config and workfile,disk, is this what you are using? If not please attach your config and workfile so I can be sure to be doing the same thing as you. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs soft lockups: locks gone crazy
On Fri, 2014-10-17 at 15:33 -0400, Josef Bacik wrote: On 10/14/2014 03:27 AM, Davidlohr Bueso wrote: Hello, I'm getting massive amounts of cpu soft lockups in Linus's tree for today. This occurs almost immediately and is very reproducible in aim7 disk workloads using btrfs: I'm trying to reproduce but it's not popping for me. What is the setup of your fs? mkfs options, mount options etc. And how are you running aim7? I'm using reaim with the default reaim.config and workfile,disk, is this what you are using? If not please attach your config and workfile so I can be sure to be doing the same thing as you. Thanks, The steps I used are: Download mmtests: https://github.com/gormanm/mmtests.git cp configs/config-global-dhp__reaim-io config edit the new config and just leave 'workfile.shared workfile.disk' workloads as the REAIM_WORKFILES parameter. ./run-mmtests --no-monitor testname Just a few mins into the test you should start getting the lockups. fwiw I have not had the time to try other setups of reaim that are not bundled with mmtests. Thanks, Davidlohr -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Ccache on btrfs
On 10/17/2014 03:24 PM, Leonidas Spyropoulos wrote: On 17/10/14, Timofey Titovets wrote: i use ccache and often compile linux from git, I don't catch any errors with it. Interesting, I re-enabled ccache on an ext4 partition (HDD) and worked fine. So I suspect it's something to do with btrfs and SSD. My mount options are: /dev/sda3 on /home type btrfs (rw,relatime,ssd,discard,nospace_cache) The problem is that I can't get hold of kernel panic since it's not in the journal [1]. I don't know a way to get hold of the kernel panic log to help anyway. What would it be useful to debug it? Setup netconsole, you can set it to go to a different box and then on that box run nc -lu -p 514 out.txt and then reproduce the panic. If you are using fedora you can install netconsole and edit /etc/sysconfig/netconsole and set SYSLOGADDR=your other boxes address. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
raid10 drive replacement
Hi ! I have a faulty drive in my raid10 and want it to be replaced. Working drive are xvd[bef] and replacement drive is xvdc. When I mount my drive in RW: #mount -odegraded /dev/xvdb /tank #dmesg -c [ 6207.294513] btrfs: device fsid 728ef4d8-928c-435c-b707-f71c459e1520 devid 1 transid 551398 /dev/xvdb [ 6207.327357] btrfs: allowing degraded mounts [ 6207.477041] btrfs: bdev (null) errs: wr 15211054, rd 3038899, flush 0, corrupt 0, gen 0 [ 6219.703606] Btrfs: too many missing devices, writeable mount is not allowed [ 6219.785929] btrfs: open_ctree failed When I mount my drive in RO: #mount -odegraded,ro /dev/xvdb /tank #btrfs filesystem show Label: none uuid: 728ef4d8-928c-435c-b707-f71c459e1520 Total devices 4 FS bytes used 4.70TiB devid1 size 2.73TiB used 2.73TiB path /dev/xvdb devid2 size 2.73TiB used 2.73TiB path devid3 size 2.73TiB used 2.73TiB path /dev/xvde devid4 size 2.73TiB used 2.73TiB path /dev/xvdf Btrfs v3.12 Of course, because my mount is in RO, i can't add device and do a balance: #btrfs device add /dev/xvdc /tank ERROR: error adding the device '/dev/xvdc' - Read-only file system Neither a replace without disk: #btrfs replace start -Br 2 /dev/xvdc /tank ERROR: ioctl(DEV_REPLACE_START) failed on /tank: Read-only file system, no error FYI I'm on ubuntu 14.04 with btrfs 3.12 Any idea ? Thanks ! -- fensoft -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: raid10 drive replacement
On 2014-10-17 18:47, Vincent. wrote: Hi ! I have a faulty drive in my raid10 and want it to be replaced. Working drive are xvd[bef] and replacement drive is xvdc. This is something I ran into the other day. Key difference is that I was running 3.17.1 kernel and 3.16 btrfs-progs When I mount my drive in RW: #mount -odegraded /dev/xvdb /tank #dmesg -c [ 6207.294513] btrfs: device fsid 728ef4d8-928c-435c-b707-f71c459e1520 devid 1 transid 551398 /dev/xvdb [ 6207.327357] btrfs: allowing degraded mounts [ 6207.477041] btrfs: bdev (null) errs: wr 15211054, rd 3038899, flush 0, corrupt 0, gen 0 [ 6219.703606] Btrfs: too many missing devices, writeable mount is not allowed [ 6219.785929] btrfs: open_ctree failed In my case, I was able to rw mount. May be update btrfs-progs and retry? When I mount my drive in RO: #mount -odegraded,ro /dev/xvdb /tank #btrfs filesystem show Label: none uuid: 728ef4d8-928c-435c-b707-f71c459e1520 Total devices 4 FS bytes used 4.70TiB devid1 size 2.73TiB used 2.73TiB path /dev/xvdb devid2 size 2.73TiB used 2.73TiB path devid3 size 2.73TiB used 2.73TiB path /dev/xvde devid4 size 2.73TiB used 2.73TiB path /dev/xvdf Btrfs v3.12 Of course, because my mount is in RO, i can't add device and do a balance: #btrfs device add /dev/xvdc /tank ERROR: error adding the device '/dev/xvdc' - Read-only file system Neither a replace without disk: #btrfs replace start -Br 2 /dev/xvdc /tank ERROR: ioctl(DEV_REPLACE_START) failed on /tank: Read-only file system, no error Because my mount was rw, replace worked. FYI I'm on ubuntu 14.04 with btrfs 3.12 Any idea ? Thanks ! -- fensoft -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
strange 3.16.3 problem
I have a system running the Debian 3.16.3-2 AMD64 kernel for the Xen Dom0 and the DomUs. The Dom0 has a pair of 500G SATA disks in a BTRFS RAID-1 array. The RAID-1 array has some subvols exported by NFS as well as a subvol for the disk images for the DomUs - I am not using NoCOW as performance is fine without it and I like having checksums on everything. I have started having some problems with a mail server that is running in a DomU. The mail server has 32bit user-space because it was copied from a 32bit system and I had no reason to upgrade it to 64bit, but it's running a 64bit kernel so I don't think that 32bit user-space is related to my problem. # find . -name *546 ./1412233213.M638209P10546 # ls -l ./1412233213.M638209P10546 ls: cannot access ./1412233213.M638209P10546: No such file or directory Above is the problem, find says that the file in question exists but ls doesn't think so, the file in question is part of a Maildir spool that's NFS mounted. This problem persisted across a reboot of the DomU, so it's a problem with the Dom0 (the NFS server). The dmesg output on the Dom0 doesn't appear to have anything relevant, and a find command doesn't find the file. I don't know if this is a NFS problem or a BTRFS problem. I haven't rebooted the Dom0 yet because a remote reboot of a server running a kernel from Debian/Unstable is something I try to avoid. Any suggestions? -- My Main Blog http://etbe.coker.com.au/ My Documents Bloghttp://doc.coker.com.au/ -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: raid10 drive replacement
somethings aren't matching well. the issue is.. [ 6219.703606] Btrfs: too many missing devices, writeable mount is not allowed But per Vincent only xvdc is missing in a raid10 (both data and metadata are raid10 ?) Anand On 10/18/14 10:02, Suman Chakravartula wrote: On 2014-10-17 18:47, Vincent. wrote: Hi ! I have a faulty drive in my raid10 and want it to be replaced. Working drive are xvd[bef] and replacement drive is xvdc. This is something I ran into the other day. Key difference is that I was running 3.17.1 kernel and 3.16 btrfs-progs When I mount my drive in RW: #mount -odegraded /dev/xvdb /tank #dmesg -c [ 6207.294513] btrfs: device fsid 728ef4d8-928c-435c-b707-f71c459e1520 devid 1 transid 551398 /dev/xvdb [ 6207.327357] btrfs: allowing degraded mounts [ 6207.477041] btrfs: bdev (null) errs: wr 15211054, rd 3038899, flush 0, corrupt 0, gen 0 [ 6219.703606] Btrfs: too many missing devices, writeable mount is not allowed [ 6219.785929] btrfs: open_ctree failed In my case, I was able to rw mount. May be update btrfs-progs and retry? When I mount my drive in RO: #mount -odegraded,ro /dev/xvdb /tank #btrfs filesystem show Label: none uuid: 728ef4d8-928c-435c-b707-f71c459e1520 Total devices 4 FS bytes used 4.70TiB devid1 size 2.73TiB used 2.73TiB path /dev/xvdb devid2 size 2.73TiB used 2.73TiB path devid3 size 2.73TiB used 2.73TiB path /dev/xvde devid4 size 2.73TiB used 2.73TiB path /dev/xvdf Btrfs v3.12 Of course, because my mount is in RO, i can't add device and do a balance: #btrfs device add /dev/xvdc /tank ERROR: error adding the device '/dev/xvdc' - Read-only file system Neither a replace without disk: #btrfs replace start -Br 2 /dev/xvdc /tank ERROR: ioctl(DEV_REPLACE_START) failed on /tank: Read-only file system, no error Because my mount was rw, replace worked. FYI I'm on ubuntu 14.04 with btrfs 3.12 Any idea ? Thanks ! -- fensoft -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html