Hi.
>> Do you have any idea why this is happening?
>
>Ok. I guess that you have got a congituous chunk of memory which is more
>than 4G and size field overflow might be taking place in /proc/vmcore code.
>Can you please apply the attached patch and try.
I applied your patch to 2.6.16-mm2, and tested kdump again. As the result,
kernel froze while executing the cp command. 

An infinite loop happened in read_vmcore(), because the nr_bytes of
read_vmcore() was defined with size_t(unsigned int).
If the nr_bytes of read_vmcore() is defined with u64 like the appended patch,
the cp command is able to operate successfully and creates the dump file of
the right size.

>Please also post the output of /proc/iomem.
Ok. The output of /proc/iomem was added to the following operation log. 

We should confirm that there is no problem concerning another valuse defined
with (unsigned) int -> the code should be checked to make sure that there
aren't similar problems of storing 64 bit values in int or unsigned int
variables.


The test environment is as follows.
---------------------------------------------------------
arch           : i386 smp
first-kernel   : 2.6.16-mm2 + Vivek's patch(2006/03/31)
second-kernel-1: 2.6.16-mm2 + Vivek's patch(2006/03/31)
second-kernel-2: 2.6.16-mm2 + Vivek's patch(2006/03/31) + the attached patch
memory         : 8GB
kexec-tools    : 1.01 + kdump7.patch
                      + Vivek's patch(2006/03/30 "kexec -l bzImage" fails)
---------------------------------------------------------


The operation log is as follows.
---------------------------------------------------------
# uname -r
2.6.16-mm2-fs-fix.smp
# cat /proc/iomem
00000000-0009b3ff : System RAM
0009b400-0009ffff : reserved
000a0000-000bffff : Video RAM area
000c0000-000cafff : Video ROM
000cb000-000cbfff : Adapter ROM
000cc000-000ccfff : Adapter ROM
000cd000-000d6dff : Adapter ROM
000d7000-000d83ff : Adapter ROM
000f0000-000fffff : System ROM
00100000-cff6ffff : System RAM
  00100000-003628bb : Kernel code
  003628bc-004c6c3f : Kernel data
  01000000-04ffffff : Crash kernel
cff70000-cff7afff : ACPI Tables
cff7b000-cff7ffff : ACPI Non-volatile Storage
cff80000-cfffffff : reserved
d0000000-d0000fff : 0000:00:01.0
d0001000-d000100f : 0000:00:1d.4
d0001400-d00017ff : 0000:00:1d.7
  d0001400-d00017ff : ehci_hcd
d0100000-d01fffff : PCI Bus #01
  d0100000-d01fffff : PCI Bus #02
    d0100000-d0101fff : 0000:02:03.0
      d0100000-d0100fff : aic79xx
    d0102000-d0103fff : 0000:02:03.1
      d0102000-d0102fff : aic79xx
    d0180000-d01fffff : 0000:02:03.1
d0200000-d02fffff : PCI Bus #09
  d0200000-d020ffff : 0000:09:01.0
  d0220000-d023ffff : 0000:09:02.0
    d0220000-d023ffff : e1000
  d0240000-d025ffff : 0000:09:02.0
    d0240000-d025ffff : e1000
  d0260000-d027ffff : 0000:09:03.0
    d0260000-d027ffff : e1000
  d0280000-d029ffff : 0000:09:03.0
    d0280000-d029ffff : e1000
  d02a0000-d02bffff : 0000:09:01.0
  d02c0000-d02dffff : 0000:09:02.0
  d02e0000-d02fffff : 0000:09:03.0
d0300000-d03fffff : PCI Bus #0a
  d0300000-d031ffff : 0000:0a:01.0
    d0300000-d031ffff : e1000
  d0320000-d033ffff : 0000:0a:01.0
    d0320000-d033ffff : e1000
d0600000-d06fffff : PCI Bus #01
  d0600000-d06fffff : PCI Bus #02
    d0600000-d060ffff : 0000:02:01.0
      d0600000-d060ffff : MegaRAID: LSI Logic Corporation
    d0610000-d0617fff : 0000:02:01.0
    d0680000-d06fffff : 0000:02:03.0
d1000000-d10fffff : PCI Bus #0a
  d1000000-d101ffff : 0000:0a:01.0
d1100000-d11003ff : 0000:00:1f.1
d8000000-dfffffff : PCI Bus #09
  d8000000-dfffffff : 0000:09:01.0
e0000000-efffffff : reserved
fec00000-fec0ffff : reserved
fee00000-fee00fff : reserved
ff800000-ffbfffff : reserved
fffffc00-ffffffff : reserved
100000000-22fffffff : System RAM
#
# free
             total       used       free     shared    buffers     cached
Mem:       8227048     132036    8095012          0      13476      57244
-/+ buffers/cache:      61316    8165732
Swap:      2096472          0    2096472
# kexec -p /boot/vmlinux-2.6.16-mm2-fs-fix.capture --args-linux \
> --elf64-core-headers --initrd=/boot/initrd-2.6.16-mm2-fs-fix.capture.img \
> --append="root=LABEL=/123 init 3 irqpoll"
# echo c > /proc/sysrq-trigger

(capture-kernel-1 is booting.)

# uname -r
2.6.16-mm2-fs-fix.capture
# ll /proc/vmcore
-r--------  1 root root 8521843744 Apr  3 13:46 /proc/vmcore
# cp /proc/vmcore dumpfile
BUG: soft lockup detected on CPU#0!
 <c1003d4d> show_trace+0xd/0x10   <c1003e67> dump_stack+0x17/0x20
 <c1047ccd> softlockup_tick_0x8d/0xb0   <c1029af2> run_local_timer+0x12/0x20
 <c10298d6> update_process_times+0x66/0xa0   <c10063f1> timer_interrupt+0x41/0x0
 <c1047e95> handle_IRQ_event+0x35/0x70   <c1947f47> __do_IRQ+0x77/0x30
 <c10050fa> do_IRQ+0x5a/0xa0
 =======================
 <c1003962> common_interrupt+0x1a/0x20   <c10a92f4> read_vmcore+0x114/0x220
 <c106aa08> vfs_read+0x180/0x190   <c106ad6d> sys_read+0x3d/0x70
 <c1002f2f> sysenter_past_esp+0x54/0x75
 
(push the reset, and first-kernel is booting.)

# uname -r
2.6.16-mm2-fs-fix.smp
# ll dumpfile
-r--------  1 root root 3944099840 Apr  3 13:49 dumpfile
#
# kexec -p /boot/vmlinux-2.6.16-mm2-fs-fix.capture2 --args-linux \
> --elf64-core-headers --initrd=/boot/initrd-2.6.16-mm2-fs-fix.capture2.img \
> --append="root=LABEL=/123 init 3 irqpoll"
# echo c > /proc/sysrq-trigger

(capture-kernel-2 is booting.)

# uname -r
2.6.16-mm2-fs-fix.capture2
# cat /proc/iomem
00000000-0009ffff : System RAM
  00000000-00000000 : Crash kernel
000a0000-000bffff : Video RAM area
000c0000-000cafff : Video ROM
000cb000-000cbfff : Adapter ROM
000cc000-000ccfff : Adapter ROM
000cd000-000d6dff : Adapter ROM
000d7000-000d83ff : Adapter ROM
000f0000-000fffff : System ROM
01000000-01466fff : System RAM
  01000000-012589db : Kernel code
  012589dc-013b873f : Kernel data
01507400-04ffffff : System RAM
d0000000-d0000fff : 0000:00:01.0
d0001000-d000100f : 0000:00:1d.4
d0001400-d00017ff : 0000:00:1d.7
  d0001400-d00017ff : ehci_hcd
d0100000-d01fffff : PCI Bus #01
  d0100000-d01fffff : PCI Bus #02
    d0100000-d0101fff : 0000:02:03.0
      d0100000-d0100fff : aic79xx
    d0102000-d0103fff : 0000:02:03.1
      d0102000-d0102fff : aic79xx
    d0180000-d01fffff : 0000:02:03.1
d0200000-d02fffff : PCI Bus #09
  d0200000-d020ffff : 0000:09:01.0
  d0220000-d023ffff : 0000:09:02.0
    d0220000-d023ffff : e1000
  d0240000-d025ffff : 0000:09:02.0
    d0240000-d025ffff : e1000
  d0260000-d027ffff : 0000:09:03.0
    d0260000-d027ffff : e1000
  d0280000-d029ffff : 0000:09:03.0
    d0280000-d029ffff : e1000
  d02a0000-d02bffff : 0000:09:01.0
  d02c0000-d02dffff : 0000:09:02.0
  d02e0000-d02fffff : 0000:09:03.0
d0300000-d03fffff : PCI Bus #0a
  d0300000-d031ffff : 0000:0a:01.0
    d0300000-d031ffff : e1000
  d0320000-d033ffff : 0000:0a:01.0
    d0320000-d033ffff : e1000
d0600000-d06fffff : PCI Bus #01
  d0600000-d06fffff : PCI Bus #02
    d0600000-d060ffff : 0000:02:01.0
      d0600000-d060ffff : MegaRAID: LSI Logic Corporation
    d0610000-d0617fff : 0000:02:01.0
    d0680000-d06fffff : 0000:02:03.0
d1000000-d10fffff : PCI Bus #0a
  d1000000-d101ffff : 0000:0a:01.0
d1100000-d11003ff : 0000:00:1f.1
d8000000-dfffffff : PCI Bus #09
  d8000000-dfffffff : 0000:09:01.0
#
# cp /proc/vmcore dumpfile
# ll /proc/vmcore
-r--------  1 root root 8521843744 Apr  3 16:10 /proc/vmcore
# ll
total 8330256
-r--------  1 root root 8521843744 Apr  3 16:16 dumpfile
#
---------------------------------------------------------

Thanks
Ken'ichi Ohmichi

Attachment: kdump-vmcore-size-bug-contigmem4GB-fix.patch
Description: Binary data

_______________________________________________
fastboot mailing list
[email protected]
https://lists.osdl.org/mailman/listinfo/fastboot

Reply via email to