Re: Regression in 2.6.35 RC1 (Ubuntu 2.6.35-1-generic)

2010-06-10 Thread Yan, Zheng
On Fri, Jun 11, 2010 at 12:54 PM, Brad Figg  wrote:
> I'm seeing the following in /var/log/messages. The system is not under
> any particular load. The system has my /home as a btrfs partition. The
> system has GDM up and I've logged in via ssh.
>
>
> Jun 10 21:22:43 bradf-x301 kernel: [  175.563934] CPU 1
> Jun 10 21:22:43 bradf-x301 kernel: [  175.563937] Modules linked in: cryptd
> aes_x86_64 aes_generic rfcomm binfmt_misc sco ppdev bridge stp bnep l2cap
> dm_crypt snd_hda_codec_conexant snd_hda_intel snd_hda_codec snd_hwdep
> snd_pcm thinkpad_acpi snd_seq_midi snd_rawmidi snd_seq_midi_event arc4
> snd_seq snd_timer snd_seq_device uvcvideo lp iwlagn videodev parport iwlcore
> btusb mac80211 v4l1_compat snd cfg80211 bluetooth joydev soundcore
> v4l2_compat_ioctl32 psmouse tpm_tis tpm serio_raw tpm_bios led_class nvram
> snd_page_alloc btrfs zlib_deflate crc32c libcrc32c vga16fb vgastate usbhid
> hid i915 drm_kms_helper drm ahci i2c_algo_bit video e1000e output libahci
> intel_agp
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564051]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564059] Pid: 936, comm:
> btrfs-endio-wri Tainted: G        W   2.6.35-1-generic #1-Ubuntu
> 2777MSU/2777MSU
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564066] RIP:
> 0010:[]  []
> btrfs_free_tree_block+0x3bb/0x3e0 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564102] RSP: 0018:8801282638f0
>  EFLAGS: 00010287
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564108] RAX: 8801326c5b00 RBX:
> 88013279e800 RCX: 8801326c5300
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564114] RDX: 000181ff RSI:
> 00014000 RDI: 8801304dc070
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564121] RBP: 880128263940 R08:
>  R09: 0007
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564126] R10: 0001 R11:
>  R12: 880123dba7e0
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564132] R13: 8801304dc128 R14:
> 880123dbd000 R15: 8801326c5300
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564139] FS:
>  () GS:880001e8() knlGS:
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564146] CS:  0010 DS:  ES:
>  CR0: 8005003b
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564152] CR2: 7f1a263e4e40 CR3:
> 01a2a000 CR4: 06e0
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564158] DR0:  DR1:
>  DR2: 
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564164] DR3:  DR6:
> 0ff0 DR7: 0400
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564171] Process btrfs-endio-wri
> (pid: 936, threadinfo 880128262000, task 880128635b40)
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564179]  88010002
>  880130b301b8 b242
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564189] <0> 880128263940
> 88013081c630 880123dba900 88013279e800
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564199] <0> 880123dba7e0
> 880123dbd000 8801282639f0 a014992b
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564234]  []
> __btrfs_cow_block+0x3bb/0x620 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564257]  []
> btrfs_cow_block+0x107/0x1f0 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564280]  []
> btrfs_search_slot+0x37e/0x6b0 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564308]  []
> btrfs_lookup_csum+0x6d/0x160 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564321]  [] ?
> kmem_cache_alloc+0xe5/0x140
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564347]  []
> btrfs_csum_file_blocks+0xd9/0x850 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564380]  [] ?
> merge_state+0x7e/0x150 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564411]  [] ?
> free_extent_state+0x37/0x60 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564442]  []
> add_pending_csums+0x49/0x70 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564471]  []
> btrfs_finish_ordered_io+0x1a3/0x2a0 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564482]  [] ?
> test_clear_page_writeback+0x8d/0x150
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564512]  []
> btrfs_writepage_end_io_hook+0x1a/0x20 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564542]  []
> end_bio_extent_writepage+0x13b/0x180 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564553]  [] ?
> schedule_timeout+0x19d/0x310
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564564]  [] ?
> process_timeout+0x0/0x10
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564573]  []
> bio_endio+0x1d/0x40
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564601]  []
> end_workqueue_fn+0xfc/0x130 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564629]  []
> worker_loop+0x15c/0x4c0 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564657]  [] ?
> worker_loop+0x0/0x4c0 [btrfs]
> Jun 10 21:22:43 bradf-x301 kernel: [  175.564668]  []
> kthread+0x96/0xa0
> Jun 10 21:22

Regression in 2.6.35 RC1 (Ubuntu 2.6.35-1-generic)

2010-06-10 Thread Brad Figg

I'm seeing the following in /var/log/messages. The system is not under
any particular load. The system has my /home as a btrfs partition. The
system has GDM up and I've logged in via ssh.


Jun 10 21:22:43 bradf-x301 kernel: [  175.563934] CPU 1
Jun 10 21:22:43 bradf-x301 kernel: [  175.563937] Modules linked in: cryptd aes_x86_64 aes_generic rfcomm binfmt_misc sco ppdev bridge stp bnep l2cap dm_crypt snd_hda_codec_conexant snd_hda_intel snd_hda_codec snd_hwdep snd_pcm thinkpad_acpi snd_seq_midi 
snd_rawmidi snd_seq_midi_event arc4 snd_seq snd_timer snd_seq_device uvcvideo lp iwlagn videodev parport iwlcore btusb mac80211 v4l1_compat snd cfg80211 bluetooth joydev soundcore v4l2_compat_ioctl32 psmouse tpm_tis tpm serio_raw tpm_bios led_class nvram 
snd_page_alloc btrfs zlib_deflate crc32c libcrc32c vga16fb vgastate usbhid hid i915 drm_kms_helper drm ahci i2c_algo_bit video e1000e output libahci intel_agp

Jun 10 21:22:43 bradf-x301 kernel: [  175.564051]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564059] Pid: 936, comm: 
btrfs-endio-wri Tainted: GW   2.6.35-1-generic #1-Ubuntu 2777MSU/2777MSU
Jun 10 21:22:43 bradf-x301 kernel: [  175.564066] RIP: 0010:[]  
[] btrfs_free_tree_block+0x3bb/0x3e0 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564102] RSP: 0018:8801282638f0  
EFLAGS: 00010287
Jun 10 21:22:43 bradf-x301 kernel: [  175.564108] RAX: 8801326c5b00 RBX: 
88013279e800 RCX: 8801326c5300
Jun 10 21:22:43 bradf-x301 kernel: [  175.564114] RDX: 000181ff RSI: 
00014000 RDI: 8801304dc070
Jun 10 21:22:43 bradf-x301 kernel: [  175.564121] RBP: 880128263940 R08: 
 R09: 0007
Jun 10 21:22:43 bradf-x301 kernel: [  175.564126] R10: 0001 R11: 
 R12: 880123dba7e0
Jun 10 21:22:43 bradf-x301 kernel: [  175.564132] R13: 8801304dc128 R14: 
880123dbd000 R15: 8801326c5300
Jun 10 21:22:43 bradf-x301 kernel: [  175.564139] FS:  () 
GS:880001e8() knlGS:
Jun 10 21:22:43 bradf-x301 kernel: [  175.564146] CS:  0010 DS:  ES:  
CR0: 8005003b
Jun 10 21:22:43 bradf-x301 kernel: [  175.564152] CR2: 7f1a263e4e40 CR3: 
01a2a000 CR4: 06e0
Jun 10 21:22:43 bradf-x301 kernel: [  175.564158] DR0:  DR1: 
 DR2: 
Jun 10 21:22:43 bradf-x301 kernel: [  175.564164] DR3:  DR6: 
0ff0 DR7: 0400
Jun 10 21:22:43 bradf-x301 kernel: [  175.564171] Process btrfs-endio-wri (pid: 
936, threadinfo 880128262000, task 880128635b40)
Jun 10 21:22:43 bradf-x301 kernel: [  175.564179]  88010002 
 880130b301b8 b242
Jun 10 21:22:43 bradf-x301 kernel: [  175.564189] <0> 880128263940 
88013081c630 880123dba900 88013279e800
Jun 10 21:22:43 bradf-x301 kernel: [  175.564199] <0> 880123dba7e0 
880123dbd000 8801282639f0 a014992b
Jun 10 21:22:43 bradf-x301 kernel: [  175.564234]  [] 
__btrfs_cow_block+0x3bb/0x620 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564257]  [] 
btrfs_cow_block+0x107/0x1f0 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564280]  [] 
btrfs_search_slot+0x37e/0x6b0 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564308]  [] 
btrfs_lookup_csum+0x6d/0x160 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564321]  [] ? 
kmem_cache_alloc+0xe5/0x140
Jun 10 21:22:43 bradf-x301 kernel: [  175.564347]  [] 
btrfs_csum_file_blocks+0xd9/0x850 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564380]  [] ? 
merge_state+0x7e/0x150 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564411]  [] ? 
free_extent_state+0x37/0x60 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564442]  [] 
add_pending_csums+0x49/0x70 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564471]  [] 
btrfs_finish_ordered_io+0x1a3/0x2a0 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564482]  [] ? 
test_clear_page_writeback+0x8d/0x150
Jun 10 21:22:43 bradf-x301 kernel: [  175.564512]  [] 
btrfs_writepage_end_io_hook+0x1a/0x20 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564542]  [] 
end_bio_extent_writepage+0x13b/0x180 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564553]  [] ? 
schedule_timeout+0x19d/0x310
Jun 10 21:22:43 bradf-x301 kernel: [  175.564564]  [] ? 
process_timeout+0x0/0x10
Jun 10 21:22:43 bradf-x301 kernel: [  175.564573]  [] 
bio_endio+0x1d/0x40
Jun 10 21:22:43 bradf-x301 kernel: [  175.564601]  [] 
end_workqueue_fn+0xfc/0x130 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564629]  [] 
worker_loop+0x15c/0x4c0 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564657]  [] ? 
worker_loop+0x0/0x4c0 [btrfs]
Jun 10 21:22:43 bradf-x301 kernel: [  175.564668]  [] 
kthread+0x96/0xa0
Jun 10 21:22:43 bradf-x301 kernel: [  175.564677]  [] 
kernel_thread_helper+0x4/0x10
Jun 10 21:22:43 bradf-x301 kernel: [  175.564686]  [] ? 
kthread+0x0/0xa0
Jun 10 21:22:43 bradf-x301 kernel: [  175.564693] 

Adding to multi-device raid filesystems

2010-06-10 Thread Adrian -

Dear all,



What raid policy does a subsequent balance operation follow

when adding a new device to an existing -d raid0 filesystem

initially created on two devices? What if the initial creation

was raid1?



In particular, if I have two 1TB drives that I initially stripe,

can I subsequently mirror them onto a 2TB drive that I add

later on? Can I recover if any single drive fails?



Thank you.
  
_
Look 'em in the eye: FREE Messenger video chat
http://go.microsoft.com/?linkid=9734386--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs: hanging processes - race condition?

2010-06-10 Thread Yan, Zheng
On Fri, Jun 11, 2010 at 9:12 AM, Shaohua Li  wrote:
> On Fri, Jun 11, 2010 at 01:41:41AM +0800, Jerome Ibanes wrote:
>> List,
>>
>> I ran into a hang issue (race condition: cpu is high when the server is
>> idle, meaning that btrfs is hanging, and IOwait is high as well) running
>> 2.6.34 on debian/lenny on a x86_64 server (dual Opteron 275 w/ 16GB ram).
>> The btrfs filesystem live on 18x300GB scsi spindles, configured as Raid-0,
>> as shown below:
>>
>> Label: none  uuid: bc6442c6-2fe2-4236-a5aa-6b7841234c52
>>          Total devices 18 FS bytes used 2.94TB
>>          devid    5 size 279.39GB used 208.33GB path /dev/cciss/c1d0
>>          devid   17 size 279.39GB used 208.34GB path /dev/cciss/c1d8
>>          devid   16 size 279.39GB used 209.33GB path /dev/cciss/c1d7
>>          devid    4 size 279.39GB used 208.33GB path /dev/cciss/c0d4
>>          devid    1 size 279.39GB used 233.72GB path /dev/cciss/c0d1
>>          devid   13 size 279.39GB used 208.33GB path /dev/cciss/c1d4
>>          devid    8 size 279.39GB used 208.33GB path /dev/cciss/c1d11
>>          devid   12 size 279.39GB used 208.33GB path /dev/cciss/c1d3
>>          devid    3 size 279.39GB used 208.33GB path /dev/cciss/c0d3
>>          devid    9 size 279.39GB used 208.33GB path /dev/cciss/c1d12
>>          devid    6 size 279.39GB used 208.33GB path /dev/cciss/c1d1
>>          devid   11 size 279.39GB used 208.33GB path /dev/cciss/c1d2
>>          devid   14 size 279.39GB used 208.33GB path /dev/cciss/c1d5
>>          devid    2 size 279.39GB used 233.70GB path /dev/cciss/c0d2
>>          devid   15 size 279.39GB used 209.33GB path /dev/cciss/c1d6
>>          devid   10 size 279.39GB used 208.33GB path /dev/cciss/c1d13
>>          devid    7 size 279.39GB used 208.33GB path /dev/cciss/c1d10
>>          devid   18 size 279.39GB used 208.34GB path /dev/cciss/c1d9
>> Btrfs v0.19-16-g075587c-dirty
>>
>> The filesystem, mounted in /mnt/btrfs is hanging, no existing or new
>> process can access it, however 'df' still displays the disk usage (3TB out
>> of 5). The disks appear to be physically healthy. Please note that a
>> significant number of files were placed on this filesystem, between 20 and
>> 30 million files.
>>
>> The relevant kernel messages are displayed below:
>>
>> INFO: task btrfs-submit-0:4220 blocked for more than 120 seconds.
>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>> btrfs-submit- D 00010042e12f     0  4220      2 0x
>>   8803e584ac70 0046 4000 00011680
>>   8803f7349fd8 8803f7349fd8 8803e584ac70 00011680
>>   0001 8803ff99d250 8149f020 81150ab0
>> Call Trace:
>>   [] ? io_schedule+0x71/0xb1
>>   [] ? get_request_wait+0xab/0x140
>>   [] ? autoremove_wake_function+0x0/0x2e
>>   [] ? elv_rq_merge_ok+0x89/0x97
>>   [] ? blk_recount_segments+0x17/0x27
>>   [] ? __make_request+0x2d6/0x3fc
>>   [] ? generic_make_request+0x207/0x268
>>   [] ? submit_bio+0x9b/0xa2
>>   [] ? btrfs_requeue_work+0xd7/0xe1 [btrfs]
>>   [] ? run_scheduled_bios+0x297/0x48f [btrfs]
>>   [] ? worker_loop+0x17c/0x452 [btrfs]
>>   [] ? worker_loop+0x0/0x452 [btrfs]
>>   [] ? kthread+0x79/0x81
>>   [] ? kernel_thread_helper+0x4/0x10
>>   [] ? kthread+0x0/0x81
>>   [] ? kernel_thread_helper+0x0/0x10
> This looks like the issue we saw too, http://lkml.org/lkml/2010/6/8/375.
> This is reproduceable in our setup.

I think I know the cause of http://lkml.org/lkml/2010/6/8/375.
The code in the first do-while loop in btrfs_commit_transaction
set current process to TASK_UNINTERRUPTIBLE state, then calls
btrfs_start_delalloc_inodes, btrfs_wait_ordered_extents and
btrfs_run_ordered_operations(). All of these function may call
cond_resched().
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


bcp command not installed by Makefile

2010-06-10 Thread Palmer Cox
Is there a reason the bcp command isn't installed when running make
install?  If not, here is a really simple patch to add it to the
install.

-Palmer Cox


diff --git a/Makefile b/Makefile
index 525676e..e5005b0 100644
--- a/Makefile
+++ b/Makefile
@@ -93,6 +93,7 @@ clean :
 install: $(progs) install-man
        $(INSTALL) -m755 -d $(DESTDIR)$(bindir)
        $(INSTALL) $(progs) $(DESTDIR)$(bindir)
+       $(INSTALL) -m755 bcp $(DESTDIR)$(bindir)
        if [ -e btrfs-convert ]; then $(INSTALL) btrfs-convert
$(DESTDIR)$(bindir); fi

 -include .*.d
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs: hanging processes - race condition?

2010-06-10 Thread Shaohua Li
On Fri, Jun 11, 2010 at 01:41:41AM +0800, Jerome Ibanes wrote:
> List,
> 
> I ran into a hang issue (race condition: cpu is high when the server is
> idle, meaning that btrfs is hanging, and IOwait is high as well) running
> 2.6.34 on debian/lenny on a x86_64 server (dual Opteron 275 w/ 16GB ram).
> The btrfs filesystem live on 18x300GB scsi spindles, configured as Raid-0,
> as shown below:
> 
> Label: none  uuid: bc6442c6-2fe2-4236-a5aa-6b7841234c52
>  Total devices 18 FS bytes used 2.94TB
>  devid5 size 279.39GB used 208.33GB path /dev/cciss/c1d0
>  devid   17 size 279.39GB used 208.34GB path /dev/cciss/c1d8
>  devid   16 size 279.39GB used 209.33GB path /dev/cciss/c1d7
>  devid4 size 279.39GB used 208.33GB path /dev/cciss/c0d4
>  devid1 size 279.39GB used 233.72GB path /dev/cciss/c0d1
>  devid   13 size 279.39GB used 208.33GB path /dev/cciss/c1d4
>  devid8 size 279.39GB used 208.33GB path /dev/cciss/c1d11
>  devid   12 size 279.39GB used 208.33GB path /dev/cciss/c1d3
>  devid3 size 279.39GB used 208.33GB path /dev/cciss/c0d3
>  devid9 size 279.39GB used 208.33GB path /dev/cciss/c1d12
>  devid6 size 279.39GB used 208.33GB path /dev/cciss/c1d1
>  devid   11 size 279.39GB used 208.33GB path /dev/cciss/c1d2
>  devid   14 size 279.39GB used 208.33GB path /dev/cciss/c1d5
>  devid2 size 279.39GB used 233.70GB path /dev/cciss/c0d2
>  devid   15 size 279.39GB used 209.33GB path /dev/cciss/c1d6
>  devid   10 size 279.39GB used 208.33GB path /dev/cciss/c1d13
>  devid7 size 279.39GB used 208.33GB path /dev/cciss/c1d10
>  devid   18 size 279.39GB used 208.34GB path /dev/cciss/c1d9
> Btrfs v0.19-16-g075587c-dirty
> 
> The filesystem, mounted in /mnt/btrfs is hanging, no existing or new
> process can access it, however 'df' still displays the disk usage (3TB out
> of 5). The disks appear to be physically healthy. Please note that a
> significant number of files were placed on this filesystem, between 20 and
> 30 million files.
> 
> The relevant kernel messages are displayed below:
> 
> INFO: task btrfs-submit-0:4220 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> btrfs-submit- D 00010042e12f 0  4220  2 0x
>   8803e584ac70 0046 4000 00011680
>   8803f7349fd8 8803f7349fd8 8803e584ac70 00011680
>   0001 8803ff99d250 8149f020 81150ab0
> Call Trace:
>   [] ? io_schedule+0x71/0xb1
>   [] ? get_request_wait+0xab/0x140
>   [] ? autoremove_wake_function+0x0/0x2e
>   [] ? elv_rq_merge_ok+0x89/0x97
>   [] ? blk_recount_segments+0x17/0x27
>   [] ? __make_request+0x2d6/0x3fc
>   [] ? generic_make_request+0x207/0x268
>   [] ? submit_bio+0x9b/0xa2
>   [] ? btrfs_requeue_work+0xd7/0xe1 [btrfs]
>   [] ? run_scheduled_bios+0x297/0x48f [btrfs]
>   [] ? worker_loop+0x17c/0x452 [btrfs]
>   [] ? worker_loop+0x0/0x452 [btrfs]
>   [] ? kthread+0x79/0x81
>   [] ? kernel_thread_helper+0x4/0x10
>   [] ? kthread+0x0/0x81
>   [] ? kernel_thread_helper+0x0/0x10
This looks like the issue we saw too, http://lkml.org/lkml/2010/6/8/375.
This is reproduceable in our setup.

Thanks,
Shaohua
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Copy-on-write hard-links

2010-06-10 Thread Gordan Bobic

On 06/10/2010 09:00 PM, Chris Mason wrote:

On Thu, Jun 10, 2010 at 06:11:40PM +0100, Gordan Bobic wrote:

Is there a feature in btrfs to manually/explicitly mark hard-links
to be copy-on-write? My understanding is that this is what happens
when a snapshot is mounted rw and files modified.

Consider this scenario:

I have a base template fs. I make two snapshots of it that are
identical. The files in the template and both snapshots are
hard-links and have the same inode number.

I change a file in one of the snapshots, and it gets copied on
write. I make the same change in the other snapshot, and that, too,
gets copied on write. I now have two identical files that are not
hard-links any more.

What happens if I remove one of those files and create a hard-link
to the file in the other snapshot?


I'm afraid you can't do this.  hard linking between subvolumes isn't
allowed.  But, what you can do is use the clone ioctl to make a new
inode that references all of the data extents of an existing file, which
would be a kind of COW hard link.

Checkout bcp from btrfs-progs or cp --reflink from the latest..well
wherever cp comes from.


Would the inodes on the clone file be the same for purposes of loading a 
dynamic library? Specifically, say the file I am cloning is a DLL. 
Normally, if a DLL is hard-linked, if two programs dynamically load it 
from two different hard-links, it'll still only use one bit of shared 
memory. Will this also hold true for the cloned files? My understanding 
is that it will not since it's not the same inode. Is that the case?


Thanks.

Gordan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Copy-on-write hard-links

2010-06-10 Thread Chris Mason
On Thu, Jun 10, 2010 at 06:11:40PM +0100, Gordan Bobic wrote:
> Is there a feature in btrfs to manually/explicitly mark hard-links
> to be copy-on-write? My understanding is that this is what happens
> when a snapshot is mounted rw and files modified.
> 
> Consider this scenario:
> 
> I have a base template fs. I make two snapshots of it that are
> identical. The files in the template and both snapshots are
> hard-links and have the same inode number.
> 
> I change a file in one of the snapshots, and it gets copied on
> write. I make the same change in the other snapshot, and that, too,
> gets copied on write. I now have two identical files that are not
> hard-links any more.
> 
> What happens if I remove one of those files and create a hard-link
> to the file in the other snapshot?

I'm afraid you can't do this.  hard linking between subvolumes isn't
allowed.  But, what you can do is use the clone ioctl to make a new
inode that references all of the data extents of an existing file, which
would be a kind of COW hard link.

Checkout bcp from btrfs-progs or cp --reflink from the latest..well
wherever cp comes from.

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


btrfs: hanging processes - race condition?

2010-06-10 Thread Jerome Ibanes

List,

I ran into a hang issue (race condition: cpu is high when the server is 
idle, meaning that btrfs is hanging, and IOwait is high as well) running 
2.6.34 on debian/lenny on a x86_64 server (dual Opteron 275 w/ 16GB ram). 
The btrfs filesystem live on 18x300GB scsi spindles, configured as Raid-0, 
as shown below:


Label: none  uuid: bc6442c6-2fe2-4236-a5aa-6b7841234c52
Total devices 18 FS bytes used 2.94TB
devid5 size 279.39GB used 208.33GB path /dev/cciss/c1d0
devid   17 size 279.39GB used 208.34GB path /dev/cciss/c1d8
devid   16 size 279.39GB used 209.33GB path /dev/cciss/c1d7
devid4 size 279.39GB used 208.33GB path /dev/cciss/c0d4
devid1 size 279.39GB used 233.72GB path /dev/cciss/c0d1
devid   13 size 279.39GB used 208.33GB path /dev/cciss/c1d4
devid8 size 279.39GB used 208.33GB path /dev/cciss/c1d11
devid   12 size 279.39GB used 208.33GB path /dev/cciss/c1d3
devid3 size 279.39GB used 208.33GB path /dev/cciss/c0d3
devid9 size 279.39GB used 208.33GB path /dev/cciss/c1d12
devid6 size 279.39GB used 208.33GB path /dev/cciss/c1d1
devid   11 size 279.39GB used 208.33GB path /dev/cciss/c1d2
devid   14 size 279.39GB used 208.33GB path /dev/cciss/c1d5
devid2 size 279.39GB used 233.70GB path /dev/cciss/c0d2
devid   15 size 279.39GB used 209.33GB path /dev/cciss/c1d6
devid   10 size 279.39GB used 208.33GB path /dev/cciss/c1d13
devid7 size 279.39GB used 208.33GB path /dev/cciss/c1d10
devid   18 size 279.39GB used 208.34GB path /dev/cciss/c1d9
Btrfs v0.19-16-g075587c-dirty

The filesystem, mounted in /mnt/btrfs is hanging, no existing or new 
process can access it, however 'df' still displays the disk usage (3TB out 
of 5). The disks appear to be physically healthy. Please note that a 
significant number of files were placed on this filesystem, between 20 and 
30 million files.


The relevant kernel messages are displayed below:

INFO: task btrfs-submit-0:4220 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
btrfs-submit- D 00010042e12f 0  4220  2 0x
 8803e584ac70 0046 4000 00011680
 8803f7349fd8 8803f7349fd8 8803e584ac70 00011680
 0001 8803ff99d250 8149f020 81150ab0
Call Trace:
 [] ? io_schedule+0x71/0xb1
 [] ? get_request_wait+0xab/0x140
 [] ? autoremove_wake_function+0x0/0x2e
 [] ? elv_rq_merge_ok+0x89/0x97
 [] ? blk_recount_segments+0x17/0x27
 [] ? __make_request+0x2d6/0x3fc
 [] ? generic_make_request+0x207/0x268
 [] ? submit_bio+0x9b/0xa2
 [] ? btrfs_requeue_work+0xd7/0xe1 [btrfs]
 [] ? run_scheduled_bios+0x297/0x48f [btrfs]
 [] ? worker_loop+0x17c/0x452 [btrfs]
 [] ? worker_loop+0x0/0x452 [btrfs]
 [] ? kthread+0x79/0x81
 [] ? kernel_thread_helper+0x4/0x10
 [] ? kthread+0x0/0x81
 [] ? kernel_thread_helper+0x0/0x10
INFO: task btrfs-transacti:4230 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
btrfs-transac D 00010042e1cc 0  4230  2 0x
 8803e544d300 0046 4000 00011680
 8803f3531fd8 8803f3531fd8 8803e544d300 00011680
 8803fe488240 04c1 8803ff8d7340 000381147502
Call Trace:
 [] ? sync_buffer+0x0/0x3f
 [] ? io_schedule+0x71/0xb1
 [] ? sync_buffer+0x3b/0x3f
 [] ? __wait_on_bit+0x41/0x70
 [] ? sync_buffer+0x0/0x3f
 [] ? out_of_line_wait_on_bit+0x6b/0x77
 [] ? wake_bit_function+0x0/0x23
 [] ? write_dev_supers+0xf3/0x225 [btrfs]
 [] ? write_all_supers+0x1d4/0x22c [btrfs]
 [] ? btrfs_commit_transaction+0x4fe/0x5e1 [btrfs]
 [] ? autoremove_wake_function+0x0/0x2e
 [] ? transaction_kthread+0x16b/0x1fd [btrfs]
 [] ? transaction_kthread+0x0/0x1fd [btrfs]
 [] ? kthread+0x79/0x81
 [] ? kernel_thread_helper+0x4/0x10
 [] ? kthread+0x0/0x81
 [] ? kernel_thread_helper+0x0/0x10
INFO: task tar:31615 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
tar   D 00010042dee1 0 31615   4269 0x
 8803ffa74d70 0082 4000 00011680
 88010046dfd8 88010046dfd8 8803ffa74d70 00011680
 880361cdd480 000161cdd480 8803ff8becf0 00025fff
Call Trace:
 [] ? sync_page+0x0/0x45
 [] ? io_schedule+0x71/0xb1
 [] ? sync_page+0x41/0x45
 [] ? __wait_on_bit+0x41/0x70
 [] ? wait_on_page_bit+0x6b/0x71
 [] ? wake_bit_function+0x0/0x23
 [] ? prepare_pages+0xe0/0x244 [btrfs]
 [] ? btrfs_check_data_free_space+0x69/0x206 [btrfs]
 [] ? btrfs_file_write+0x405/0x711 [btrfs]
 [] ? tty_write+0x213/0x22e
 [] ? vfs_write+0xad/0x149
 [] ? sys_write+0x45/0x6e
 [] ? system_call_fastpath+0x16/0x1b
INFO: task btrfs-submit-0:4220 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" 

Copy-on-write hard-links

2010-06-10 Thread Gordan Bobic
Is there a feature in btrfs to manually/explicitly mark hard-links to be 
copy-on-write? My understanding is that this is what happens when a 
snapshot is mounted rw and files modified.


Consider this scenario:

I have a base template fs. I make two snapshots of it that are 
identical. The files in the template and both snapshots are hard-links 
and have the same inode number.


I change a file in one of the snapshots, and it gets copied on write. I 
make the same change in the other snapshot, and that, too, gets copied 
on write. I now have two identical files that are not hard-links any more.


What happens if I remove one of those files and create a hard-link to 
the file in the other snapshot? Will this implicitly become a 
copy-on-write file or will the hard-link aspect in the traditional sense 
be preserved? If I modify the file, will it end up modified in both? Is 
there a way to explicitly set a COW flag (on a file with hard-links)?


The reason I am asking this is because I am looking into using either 
VServer or LXC virtualization. VServer has a "hashify" feature that 
works as I described (copy-on-write hard-linking identical files between 
multiple guests). But VServer isn't, and is unlikely to ever be in the 
mainline kernel. LXC is already in the mainline kernel, but relies on 
the FS to provide this functionality. For future proofing reasons, I 
would prefer to use LXC+btrfs, but hashify is too valuable a feature to 
sacrifice for staying with the mainline. Also note that simple 
block-level dedupe isn't sufficient for the full benefit in this context 
- hard-linking has the additional benefit that multiple copies of DLLs 
in multiple guests will not use separate memory when hard-linked (i.e. 
their inodes are the same). This equates to a very substantial memory 
saving (poor man's KSM) in addition to the disk space savings when there 
are many guests.


TIA.

Gordan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [12/23] BTRFS: Clean up unused variables -- bugs

2010-06-10 Thread Andi Kleen

These are all the cases where a variable is set, but not
read which are really bugs.

- Couple of incorrect error handling fixed.
- One incorrect use of a allocation policy
- Some other things

Still needs more review.

Found by gcc 4.6's new warnings

Cc: chris.ma...@oracle.com
cc: linux-btrfs@vger.kernel.org


Signed-off-by: Andi Kleen 

---
 fs/btrfs/dir-item.c|2 +-
 fs/btrfs/extent-tree.c |3 +--
 fs/btrfs/extent_io.c   |2 ++
 fs/btrfs/inode.c   |6 +++---
 fs/btrfs/relocation.c  |4 +++-
 fs/btrfs/tree-log.c|2 +-
 6 files changed, 11 insertions(+), 8 deletions(-)

Index: linux-2.6.35-rc2-gcc/fs/btrfs/extent-tree.c
===
--- linux-2.6.35-rc2-gcc.orig/fs/btrfs/extent-tree.c
+++ linux-2.6.35-rc2-gcc/fs/btrfs/extent-tree.c
@@ -3337,8 +3337,7 @@ struct btrfs_block_rsv *btrfs_alloc_bloc
btrfs_init_block_rsv(block_rsv);
 
alloc_target = btrfs_get_alloc_profile(root, 0);
-   block_rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
+   block_rsv->space_info = __find_space_info(fs_info, alloc_target);
 
return block_rsv;
 }
Index: linux-2.6.35-rc2-gcc/fs/btrfs/dir-item.c
===
--- linux-2.6.35-rc2-gcc.orig/fs/btrfs/dir-item.c
+++ linux-2.6.35-rc2-gcc/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btr
ret = btrfs_truncate_item(trans, root, path,
  item_len - sub_item_len, 1);
}
-   return 0;
+   return ret;
 }
Index: linux-2.6.35-rc2-gcc/fs/btrfs/extent_io.c
===
--- linux-2.6.35-rc2-gcc.orig/fs/btrfs/extent_io.c
+++ linux-2.6.35-rc2-gcc/fs/btrfs/extent_io.c
@@ -2825,6 +2825,8 @@ int extent_prepare_write(struct extent_i
 NULL, 1,
 end_bio_extent_preparewrite, 0,
 0, 0);
+   if (ret && !err)
+   err = ret;
iocount++;
block_start = block_start + iosize;
} else {
Index: linux-2.6.35-rc2-gcc/fs/btrfs/inode.c
===
--- linux-2.6.35-rc2-gcc.orig/fs/btrfs/inode.c
+++ linux-2.6.35-rc2-gcc/fs/btrfs/inode.c
@@ -1372,7 +1372,7 @@ int btrfs_merge_bio_hook(struct page *pa
 
if (map_length < length + size)
return 1;
-   return 0;
+   return ret;
 }
 
 /*
@@ -2672,7 +2672,7 @@ static int check_path_shared(struct btrf
 {
struct extent_buffer *eb;
int level;
-   int ret;
+   int ret = 0;
u64 refs;
 
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
@@ -2686,7 +2686,7 @@ static int check_path_shared(struct btrf
if (refs > 1)
return 1;
}
-   return 0;
+   return ret; /* XXX callers? */
 }
 
 /*
Index: linux-2.6.35-rc2-gcc/fs/btrfs/tree-log.c
===
--- linux-2.6.35-rc2-gcc.orig/fs/btrfs/tree-log.c
+++ linux-2.6.35-rc2-gcc/fs/btrfs/tree-log.c
@@ -2273,7 +2273,7 @@ fail:
}
btrfs_end_log_trans(root);
 
-   return 0;
+   return err;
 }
 
 /* see comments for btrfs_del_dir_entries_in_log */
Index: linux-2.6.35-rc2-gcc/fs/btrfs/relocation.c
===
--- linux-2.6.35-rc2-gcc.orig/fs/btrfs/relocation.c
+++ linux-2.6.35-rc2-gcc/fs/btrfs/relocation.c
@@ -3098,6 +3098,8 @@ static int add_tree_block(struct reloc_c
BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
ret = get_ref_objectid_v0(rc, path, extent_key,
  &ref_owner, NULL);
+   if (ret < 0)
+   return ret;
BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
level = (int)ref_owner;
/* FIXME: get real generation */
@@ -4142,7 +4144,7 @@ int btrfs_reloc_clone_csums(struct inode
btrfs_add_ordered_sum(inode, ordered, sums);
}
btrfs_put_ordered_extent(ordered);
-   return 0;
+   return ret;
 }
 
 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] [13/23] BTRFS: Clean up unused variables -- nonbugs

2010-06-10 Thread Andi Kleen

These are all the cases where a variable is set, but not 
read which are not bugs as far as I can see, but simply
leftovers. 

Still needs more review.

Found by gcc 4.6's new warnings

Cc: chris.ma...@oracle.com
Cc: linux-btrfs@vger.kernel.org

 fs/btrfs/compression.c|2 --
 fs/btrfs/ctree.c  |   20 ++--
 fs/btrfs/dir-item.c   |2 +-
 fs/btrfs/disk-io.c|   10 --
 fs/btrfs/extent-tree.c|2 --
 fs/btrfs/extent_io.c  |   10 ++
 fs/btrfs/inode.c  |   19 +++
 fs/btrfs/ioctl.c  |2 --
 fs/btrfs/ordered-data.c   |2 --
 fs/btrfs/relocation.c |2 ++
 fs/btrfs/root-tree.c  |2 --
 fs/btrfs/super.c  |1 +
 fs/btrfs/tree-defrag.c|2 --
 fs/btrfs/tree-log.c   |   15 +--
 fs/btrfs/volumes.c|4 
 fs/btrfs/xattr.c  |2 --
 fs/btrfs/zlib.c   |5 -

Signed-off-by: Andi Kleen 

---
 fs/btrfs/compression.c  |2 --
 fs/btrfs/ctree.c|   20 ++--
 fs/btrfs/disk-io.c  |   11 ---
 fs/btrfs/extent-tree.c  |2 --
 fs/btrfs/extent_io.c|9 -
 fs/btrfs/inode.c|   14 --
 fs/btrfs/ioctl.c|2 --
 fs/btrfs/ordered-data.c |2 --
 fs/btrfs/root-tree.c|2 --
 fs/btrfs/super.c|6 ++
 fs/btrfs/tree-defrag.c  |2 --
 fs/btrfs/tree-log.c |   15 ---
 fs/btrfs/volumes.c  |4 
 fs/btrfs/xattr.c|2 --
 fs/btrfs/zlib.c |5 -
 15 files changed, 4 insertions(+), 94 deletions(-)

Index: linux-2.6.35-rc2-gcc/fs/btrfs/ctree.c
===
--- linux-2.6.35-rc2-gcc.orig/fs/btrfs/ctree.c
+++ linux-2.6.35-rc2-gcc/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_h
  struct extent_buffer **cow_ret, u64 new_root_objectid)
 {
struct extent_buffer *cow;
-   u32 nritems;
int ret = 0;
int level;
struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_h
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
level = btrfs_header_level(buf);
-   nritems = btrfs_header_nritems(buf);
if (level == 0)
btrfs_item_key(buf, &disk_key, 0);
else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct
int wret;
int pslot;
int orig_slot = path->slots[level];
-   int err_on_enospc = 0;
u64 orig_ptr;
 
if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
return 0;
 
-   if (btrfs_header_nritems(mid) < 2)
-   err_on_enospc = 1;
+   btrfs_header_nritems(mid);
 
left = read_node_slot(root, parent, pslot - 1);
if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct
wret = push_node_left(trans, root, left, mid, 1);
if (wret < 0)
ret = wret;
-   if (btrfs_header_nritems(mid) < 2)
-   err_on_enospc = 1;
+   btrfs_header_nritems(mid);
}
 
/*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_inser
int wret;
int pslot;
int orig_slot = path->slots[level];
-   u64 orig_ptr;
 
if (level == 0)
return 1;
 
mid = path->nodes[level];
WARN_ON(btrfs_header_generation(mid) != trans->transid);
-   orig_ptr = btrfs_node_blockptr(mid, orig_slot);
 
if (level < BTRFS_MAX_LEVEL - 1)
parent = path->nodes[level + 1];
@@ -2534,7 +2527,6 @@ static noinline int __push_leaf_left(str
 {
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
-   int slot;
int i;
int push_space = 0;
int push_items = 0;
@@ -2546,8 +2538,6 @@ static noinline int __push_leaf_left(str
u32 this_item_size;
u32 old_left_item_size;
 
-   slot = path->slots[1];
-
if (empty)
nr = right_nritems;
else
@@ -3239,7 +3229,6 @@ int btrfs_truncate_item(struct btrfs_tra
 {
int ret = 0;
int slot;
-   int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@@ -3249,7 +3238,6 @@ int btrfs_truncate_item(struct btrfs_tra
unsigned int size_diff;
int i;
 
-   slot_orig = path->slots[0];
leaf = path->nodes[0];
slot = path->slots[0];
 
@@ -3354,7 +3342,6 @@ int btrfs_extend_item(struct btrfs_trans
 {
int ret = 0;
int slot;
-   int slot_orig;
struct extent_buffer *leaf;