Hi. >> Do you have any idea why this is happening? > >Ok. I guess that you have got a congituous chunk of memory which is more >than 4G and size field overflow might be taking place in /proc/vmcore code. >Can you please apply the attached patch and try. I applied your patch to 2.6.16-mm2, and tested kdump again. As the result, kernel froze while executing the cp command.
An infinite loop happened in read_vmcore(), because the nr_bytes of
read_vmcore() was defined with size_t(unsigned int).
If the nr_bytes of read_vmcore() is defined with u64 like the appended patch,
the cp command is able to operate successfully and creates the dump file of
the right size.
>Please also post the output of /proc/iomem.
Ok. The output of /proc/iomem was added to the following operation log.
We should confirm that there is no problem concerning another valuse defined
with (unsigned) int -> the code should be checked to make sure that there
aren't similar problems of storing 64 bit values in int or unsigned int
variables.
The test environment is as follows.
---------------------------------------------------------
arch : i386 smp
first-kernel : 2.6.16-mm2 + Vivek's patch(2006/03/31)
second-kernel-1: 2.6.16-mm2 + Vivek's patch(2006/03/31)
second-kernel-2: 2.6.16-mm2 + Vivek's patch(2006/03/31) + the attached patch
memory : 8GB
kexec-tools : 1.01 + kdump7.patch
+ Vivek's patch(2006/03/30 "kexec -l bzImage" fails)
---------------------------------------------------------
The operation log is as follows.
---------------------------------------------------------
# uname -r
2.6.16-mm2-fs-fix.smp
# cat /proc/iomem
00000000-0009b3ff : System RAM
0009b400-0009ffff : reserved
000a0000-000bffff : Video RAM area
000c0000-000cafff : Video ROM
000cb000-000cbfff : Adapter ROM
000cc000-000ccfff : Adapter ROM
000cd000-000d6dff : Adapter ROM
000d7000-000d83ff : Adapter ROM
000f0000-000fffff : System ROM
00100000-cff6ffff : System RAM
00100000-003628bb : Kernel code
003628bc-004c6c3f : Kernel data
01000000-04ffffff : Crash kernel
cff70000-cff7afff : ACPI Tables
cff7b000-cff7ffff : ACPI Non-volatile Storage
cff80000-cfffffff : reserved
d0000000-d0000fff : 0000:00:01.0
d0001000-d000100f : 0000:00:1d.4
d0001400-d00017ff : 0000:00:1d.7
d0001400-d00017ff : ehci_hcd
d0100000-d01fffff : PCI Bus #01
d0100000-d01fffff : PCI Bus #02
d0100000-d0101fff : 0000:02:03.0
d0100000-d0100fff : aic79xx
d0102000-d0103fff : 0000:02:03.1
d0102000-d0102fff : aic79xx
d0180000-d01fffff : 0000:02:03.1
d0200000-d02fffff : PCI Bus #09
d0200000-d020ffff : 0000:09:01.0
d0220000-d023ffff : 0000:09:02.0
d0220000-d023ffff : e1000
d0240000-d025ffff : 0000:09:02.0
d0240000-d025ffff : e1000
d0260000-d027ffff : 0000:09:03.0
d0260000-d027ffff : e1000
d0280000-d029ffff : 0000:09:03.0
d0280000-d029ffff : e1000
d02a0000-d02bffff : 0000:09:01.0
d02c0000-d02dffff : 0000:09:02.0
d02e0000-d02fffff : 0000:09:03.0
d0300000-d03fffff : PCI Bus #0a
d0300000-d031ffff : 0000:0a:01.0
d0300000-d031ffff : e1000
d0320000-d033ffff : 0000:0a:01.0
d0320000-d033ffff : e1000
d0600000-d06fffff : PCI Bus #01
d0600000-d06fffff : PCI Bus #02
d0600000-d060ffff : 0000:02:01.0
d0600000-d060ffff : MegaRAID: LSI Logic Corporation
d0610000-d0617fff : 0000:02:01.0
d0680000-d06fffff : 0000:02:03.0
d1000000-d10fffff : PCI Bus #0a
d1000000-d101ffff : 0000:0a:01.0
d1100000-d11003ff : 0000:00:1f.1
d8000000-dfffffff : PCI Bus #09
d8000000-dfffffff : 0000:09:01.0
e0000000-efffffff : reserved
fec00000-fec0ffff : reserved
fee00000-fee00fff : reserved
ff800000-ffbfffff : reserved
fffffc00-ffffffff : reserved
100000000-22fffffff : System RAM
#
# free
total used free shared buffers cached
Mem: 8227048 132036 8095012 0 13476 57244
-/+ buffers/cache: 61316 8165732
Swap: 2096472 0 2096472
# kexec -p /boot/vmlinux-2.6.16-mm2-fs-fix.capture --args-linux \
> --elf64-core-headers --initrd=/boot/initrd-2.6.16-mm2-fs-fix.capture.img \
> --append="root=LABEL=/123 init 3 irqpoll"
# echo c > /proc/sysrq-trigger
(capture-kernel-1 is booting.)
# uname -r
2.6.16-mm2-fs-fix.capture
# ll /proc/vmcore
-r-------- 1 root root 8521843744 Apr 3 13:46 /proc/vmcore
# cp /proc/vmcore dumpfile
BUG: soft lockup detected on CPU#0!
<c1003d4d> show_trace+0xd/0x10 <c1003e67> dump_stack+0x17/0x20
<c1047ccd> softlockup_tick_0x8d/0xb0 <c1029af2> run_local_timer+0x12/0x20
<c10298d6> update_process_times+0x66/0xa0 <c10063f1> timer_interrupt+0x41/0x0
<c1047e95> handle_IRQ_event+0x35/0x70 <c1947f47> __do_IRQ+0x77/0x30
<c10050fa> do_IRQ+0x5a/0xa0
=======================
<c1003962> common_interrupt+0x1a/0x20 <c10a92f4> read_vmcore+0x114/0x220
<c106aa08> vfs_read+0x180/0x190 <c106ad6d> sys_read+0x3d/0x70
<c1002f2f> sysenter_past_esp+0x54/0x75
(push the reset, and first-kernel is booting.)
# uname -r
2.6.16-mm2-fs-fix.smp
# ll dumpfile
-r-------- 1 root root 3944099840 Apr 3 13:49 dumpfile
#
# kexec -p /boot/vmlinux-2.6.16-mm2-fs-fix.capture2 --args-linux \
> --elf64-core-headers --initrd=/boot/initrd-2.6.16-mm2-fs-fix.capture2.img \
> --append="root=LABEL=/123 init 3 irqpoll"
# echo c > /proc/sysrq-trigger
(capture-kernel-2 is booting.)
# uname -r
2.6.16-mm2-fs-fix.capture2
# cat /proc/iomem
00000000-0009ffff : System RAM
00000000-00000000 : Crash kernel
000a0000-000bffff : Video RAM area
000c0000-000cafff : Video ROM
000cb000-000cbfff : Adapter ROM
000cc000-000ccfff : Adapter ROM
000cd000-000d6dff : Adapter ROM
000d7000-000d83ff : Adapter ROM
000f0000-000fffff : System ROM
01000000-01466fff : System RAM
01000000-012589db : Kernel code
012589dc-013b873f : Kernel data
01507400-04ffffff : System RAM
d0000000-d0000fff : 0000:00:01.0
d0001000-d000100f : 0000:00:1d.4
d0001400-d00017ff : 0000:00:1d.7
d0001400-d00017ff : ehci_hcd
d0100000-d01fffff : PCI Bus #01
d0100000-d01fffff : PCI Bus #02
d0100000-d0101fff : 0000:02:03.0
d0100000-d0100fff : aic79xx
d0102000-d0103fff : 0000:02:03.1
d0102000-d0102fff : aic79xx
d0180000-d01fffff : 0000:02:03.1
d0200000-d02fffff : PCI Bus #09
d0200000-d020ffff : 0000:09:01.0
d0220000-d023ffff : 0000:09:02.0
d0220000-d023ffff : e1000
d0240000-d025ffff : 0000:09:02.0
d0240000-d025ffff : e1000
d0260000-d027ffff : 0000:09:03.0
d0260000-d027ffff : e1000
d0280000-d029ffff : 0000:09:03.0
d0280000-d029ffff : e1000
d02a0000-d02bffff : 0000:09:01.0
d02c0000-d02dffff : 0000:09:02.0
d02e0000-d02fffff : 0000:09:03.0
d0300000-d03fffff : PCI Bus #0a
d0300000-d031ffff : 0000:0a:01.0
d0300000-d031ffff : e1000
d0320000-d033ffff : 0000:0a:01.0
d0320000-d033ffff : e1000
d0600000-d06fffff : PCI Bus #01
d0600000-d06fffff : PCI Bus #02
d0600000-d060ffff : 0000:02:01.0
d0600000-d060ffff : MegaRAID: LSI Logic Corporation
d0610000-d0617fff : 0000:02:01.0
d0680000-d06fffff : 0000:02:03.0
d1000000-d10fffff : PCI Bus #0a
d1000000-d101ffff : 0000:0a:01.0
d1100000-d11003ff : 0000:00:1f.1
d8000000-dfffffff : PCI Bus #09
d8000000-dfffffff : 0000:09:01.0
#
# cp /proc/vmcore dumpfile
# ll /proc/vmcore
-r-------- 1 root root 8521843744 Apr 3 16:10 /proc/vmcore
# ll
total 8330256
-r-------- 1 root root 8521843744 Apr 3 16:16 dumpfile
#
---------------------------------------------------------
Thanks
Ken'ichi Ohmichi
kdump-vmcore-size-bug-contigmem4GB-fix.patch
Description: Binary data
_______________________________________________ fastboot mailing list [email protected] https://lists.osdl.org/mailman/listinfo/fastboot
