Re: [OTP] SMP board recommendations?
On Thu, Feb 15, 2001 at 04:38:37PM -0800, David D.W. Downey wrote: > I've tried the Abit VP6 and the MSI 6321 (694D Pro). Both give me the APIC > errors with system lockups on heavy I/O using the 2.4.1-ac1# and the > 2.4.2-pre# kernels. (The ac-## line doesn't die ANYWHERE near as often as > the other board.) the APIC code has been modified quite a bit and Maciej's fixes so far, on this part shows that my BP6 stays alive while even the -AC kernels were killed. I'd suggest you to try his patches and see if that works for you. IIRC, this is the one : patch-2.4.1-io_apic-46 diff -up --recursive --new-file linux-2.4.1.macro/arch/i386/kernel/apic.c linux-2.4.1/arch/i386/kernel/apic.c --- linux-2.4.1.macro/arch/i386/kernel/apic.c Wed Dec 13 23:54:27 2000 +++ linux-2.4.1/arch/i386/kernel/apic.c Mon Feb 12 16:11:15 2001 @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -270,7 +271,13 @@ void __init setup_local_APIC (void) * PCI Ne2000 networking cards and PII/PIII processors, dual * BX chipset. ] */ -#if 0 + /* +* Actually disabling the focus CPU check just makes the hang less +* frequent as it makes the interrupt distributon model be more +* like LRU than MRU (the short-term load is more even across CPUs). +* See also the comment in end_level_ioapic_irq(). --macro +*/ +#if 1 /* Enable focus processor (bit==0) */ value &= ~(1<<9); #else @@ -764,7 +771,7 @@ asmlinkage void smp_error_interrupt(void apic_write(APIC_ESR, 0); v1 = apic_read(APIC_ESR); ack_APIC_irq(); - irq_err_count++; + atomic_inc(_err_count); /* Here is what the APIC error bits mean: 0: Send CS error diff -up --recursive --new-file linux-2.4.1.macro/arch/i386/kernel/i8259.c linux-2.4.1/arch/i386/kernel/i8259.c --- linux-2.4.1.macro/arch/i386/kernel/i8259.c Mon Nov 20 18:01:58 2000 +++ linux-2.4.1/arch/i386/kernel/i8259.cSun Feb 11 19:54:33 2001 @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -321,7 +322,7 @@ spurious_8259A_irq: printk("spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } - irq_err_count++; + atomic_inc(_err_count); /* * Theoretically we do not have to handle this IRQ, * but in Linux this does not cause problems and is diff -up --recursive --new-file linux-2.4.1.macro/arch/i386/kernel/io_apic.c linux-2.4.1/arch/i386/kernel/io_apic.c --- linux-2.4.1.macro/arch/i386/kernel/io_apic.cSat Feb 3 12:05:49 2001 +++ linux-2.4.1/arch/i386/kernel/io_apic.c Tue Feb 13 19:59:55 2001 @@ -33,6 +33,8 @@ #include #include +#define APIC_LOCKUP_DEBUG + static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; /* @@ -122,8 +124,14 @@ static void add_pin_to_irq(unsigned int static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask,0, |= 0x0001, io_apic_sync(entry->apic))/* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffe, ) /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x0001, io_apic_sync(entry->apic) ) + /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffe, ) + /* mask = 0 */ +DO_ACTION( __mask_and_edge,0, = (reg & 0x7fff) | 0x0001, ) + /* mask = 1, trigger = 0 */ +DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffe) | 0x8000, ) + /* mask = 0, trigger = 1 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -847,6 +855,8 @@ void /*__init*/ print_local_APIC(void * v = apic_read(APIC_EOI); printk(KERN_DEBUG "... APIC EOI: %08x\n", v); + v = apic_read(APIC_RRR); + printk(KERN_DEBUG "... APIC RRR: %08x\n", v); v = apic_read(APIC_LDR); printk(KERN_DEBUG "... APIC LDR: %08x\n", v); v = apic_read(APIC_DFR); @@ -1191,12 +1201,61 @@ static unsigned int startup_level_ioapic #define enable_level_ioapic_irqunmask_IO_APIC_irq #define disable_level_ioapic_irq mask_IO_APIC_irq -static void end_level_ioapic_irq (unsigned int i) +static void end_level_ioapic_irq (unsigned int irq) { + unsigned long v; + +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts
Re: [OTP] SMP board recommendations?
On Thu, Feb 15, 2001 at 04:38:37PM -0800, David D.W. Downey wrote: I've tried the Abit VP6 and the MSI 6321 (694D Pro). Both give me the APIC errors with system lockups on heavy I/O using the 2.4.1-ac1# and the 2.4.2-pre# kernels. (The ac-## line doesn't die ANYWHERE near as often as the other board.) the APIC code has been modified quite a bit and Maciej's fixes so far, on this part shows that my BP6 stays alive while even the -AC kernels were killed. I'd suggest you to try his patches and see if that works for you. IIRC, this is the one : patch-2.4.1-io_apic-46 diff -up --recursive --new-file linux-2.4.1.macro/arch/i386/kernel/apic.c linux-2.4.1/arch/i386/kernel/apic.c --- linux-2.4.1.macro/arch/i386/kernel/apic.c Wed Dec 13 23:54:27 2000 +++ linux-2.4.1/arch/i386/kernel/apic.c Mon Feb 12 16:11:15 2001 @@ -23,6 +23,7 @@ #include linux/mc146818rtc.h #include linux/kernel_stat.h +#include asm/atomic.h #include asm/smp.h #include asm/mtrr.h #include asm/mpspec.h @@ -270,7 +271,13 @@ void __init setup_local_APIC (void) * PCI Ne2000 networking cards and PII/PIII processors, dual * BX chipset. ] */ -#if 0 + /* +* Actually disabling the focus CPU check just makes the hang less +* frequent as it makes the interrupt distributon model be more +* like LRU than MRU (the short-term load is more even across CPUs). +* See also the comment in end_level_ioapic_irq(). --macro +*/ +#if 1 /* Enable focus processor (bit==0) */ value = ~(19); #else @@ -764,7 +771,7 @@ asmlinkage void smp_error_interrupt(void apic_write(APIC_ESR, 0); v1 = apic_read(APIC_ESR); ack_APIC_irq(); - irq_err_count++; + atomic_inc(irq_err_count); /* Here is what the APIC error bits mean: 0: Send CS error diff -up --recursive --new-file linux-2.4.1.macro/arch/i386/kernel/i8259.c linux-2.4.1/arch/i386/kernel/i8259.c --- linux-2.4.1.macro/arch/i386/kernel/i8259.c Mon Nov 20 18:01:58 2000 +++ linux-2.4.1/arch/i386/kernel/i8259.cSun Feb 11 19:54:33 2001 @@ -12,6 +12,7 @@ #include linux/init.h #include linux/kernel_stat.h +#include asm/atomic.h #include asm/system.h #include asm/io.h #include asm/irq.h @@ -321,7 +322,7 @@ spurious_8259A_irq: printk("spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } - irq_err_count++; + atomic_inc(irq_err_count); /* * Theoretically we do not have to handle this IRQ, * but in Linux this does not cause problems and is diff -up --recursive --new-file linux-2.4.1.macro/arch/i386/kernel/io_apic.c linux-2.4.1/arch/i386/kernel/io_apic.c --- linux-2.4.1.macro/arch/i386/kernel/io_apic.cSat Feb 3 12:05:49 2001 +++ linux-2.4.1/arch/i386/kernel/io_apic.c Tue Feb 13 19:59:55 2001 @@ -33,6 +33,8 @@ #include asm/smp.h #include asm/desc.h +#define APIC_LOCKUP_DEBUG + static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; /* @@ -122,8 +124,14 @@ static void add_pin_to_irq(unsigned int static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask,0, |= 0x0001, io_apic_sync(entry-apic))/* mask = 1 */ -DO_ACTION( __unmask, 0, = 0xfffe, ) /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x0001, io_apic_sync(entry-apic) ) + /* mask = 1 */ +DO_ACTION( __unmask, 0, = 0xfffe, ) + /* mask = 0 */ +DO_ACTION( __mask_and_edge,0, = (reg 0x7fff) | 0x0001, ) + /* mask = 1, trigger = 0 */ +DO_ACTION( __unmask_and_level, 0, = (reg 0xfffe) | 0x8000, ) + /* mask = 0, trigger = 1 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -847,6 +855,8 @@ void /*__init*/ print_local_APIC(void * v = apic_read(APIC_EOI); printk(KERN_DEBUG "... APIC EOI: %08x\n", v); + v = apic_read(APIC_RRR); + printk(KERN_DEBUG "... APIC RRR: %08x\n", v); v = apic_read(APIC_LDR); printk(KERN_DEBUG "... APIC LDR: %08x\n", v); v = apic_read(APIC_DFR); @@ -1191,12 +1201,61 @@ static unsigned int startup_level_ioapic #define enable_level_ioapic_irqunmask_IO_APIC_irq #define disable_level_ioapic_irq mask_IO_APIC_irq -static void end_level_ioapic_irq (unsigned int i) +static void end_level_ioapic_irq (unsigned int irq) { + unsigned long v; + +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as
Re: [OTP] SMP board recommendations?
Hi David, Just to let you and the rest of the world in on a secret, 'ASL, Inc.' is the premier ATA server system builder. Jeff Nguyen is the only person that I knew two years ago that was a pioneer and I have shared some information with him before in the past, but here is ATA and it it here to stay. Cheers, Andre Hedrick Linux ATA Development ASL Kernel Development - ASL, Inc. Toll free: 1-877-ASL-3535 1757 Houret Court Fax: 1-408-941-2071 Milpitas, CA 95035Web: www.aslab.com *** shameless toys of creation to challenage the GB/$$ *** http://www.aslab.com/contents/servers/Sovereign-3400T.html http://www.aslab.com/contents/servers/Sovereign-3450T.html On Thu, 15 Feb 2001, David D.W. Downey wrote: > Thank you all for your response. > > Andre (ASL), thanks for the assist. Laurie and Janine took care of me. > Asus CUV4X-D mobo with 1GB of buffered ECC RAM. I'm in the process of > transfering all the hardware to the new board. I'll let you know if this > new board solves the APIC errors and the random lockups under heavy I/O > problems. > > I do have one more problem that I just can NOT track down. > > 2.4.1-ac10 kernel on the old Abit VP6 mobo. I'm getting curious errors > from the 2.4.1, 2.4.1-ac10, and 2.4.2-pre[#] kernels. > > I've been using > > dd if=/dev/zero of=/tmp/testdd.img bs=1024k count=1500 > > for testing of I/O on the various boards I have here. Now, the funny part > is that I get "file size limit exceeded" at around 1.0GB. I was getting > this under the 2.4.2-pre# kernels so i switched to straight 2.4.1 and got > the same problem. I switched to the 2.4.1-ac# line and the problem > disappeared. Guess what? It's baaacckk! > > So, I did a strace of the dd command and got the following from it > > execve("/bin/dd", ["dd", "if=/dev/zero", "of=/tmp/testing.img", "bs=1024k", >"count=1500"], [/* 22 vars */]) = 0 > brk(0) = 0x804e7b8 > open("/etc/ld.so.preload", O_RDONLY)= -1 ENOENT (No such file or directory) > open("/etc/ld.so.cache", O_RDONLY) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=7852, ...}) = 0 > old_mmap(NULL, 7852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x40015000 > close(3)= 0 > open("/lib/libc.so.6", O_RDONLY)= 3 > fstat(3, {st_mode=S_IFREG|0755, st_size=1183326, ...}) = 0 > read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200\215"..., 4096) = 4096 > old_mmap(NULL, 947548, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x40017000 > mprotect(0x400f7000, 30044, PROT_NONE) = 0 > old_mmap(0x400f7000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0xdf000) >= 0x400f7000 > old_mmap(0x400fb000, 13660, PROT_READ|PROT_WRITE, >MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x400fb000 > close(3)= 0 > old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = >0x400ff000 > mprotect(0x40017000, 917504, PROT_READ|PROT_WRITE) = 0 > mprotect(0x40017000, 917504, PROT_READ|PROT_EXEC) = 0 > munmap(0x40015000, 7852)= 0 > personality(PER_LINUX) = 0 > getpid()= 195 > brk(0) = 0x804e7b8 > brk(0x804e7f0) = 0x804e7f0 > brk(0x804f000) = 0x804f000 > open("/dev/zero", O_RDONLY|O_LARGEFILE) = 3 > open("/tmp/testing.img", O_RDWR|O_CREAT|O_TRUNC|O_LARGEFILE, 0666) = 4 > rt_sigaction(SIGINT, NULL, {SIG_DFL}, 8) = 0 > rt_sigaction(SIGINT, {0x804ada8, [], 0x400}, NULL, 8) = 0 > rt_sigaction(SIGQUIT, NULL, {SIG_DFL}, 8) = 0 > rt_sigaction(SIGQUIT, {0x804ada8, [], 0x400}, NULL, 8) = 0 > rt_sigaction(SIGPIPE, NULL, {SIG_DFL}, 8) = 0 > rt_sigaction(SIGPIPE, {0x804ada8, [], 0x400}, NULL, 8) = 0 > rt_sigaction(SIGUSR1, NULL, {SIG_DFL}, 8) = 0 > rt_sigaction(SIGUSR1, {0x804ae70, [], 0x400}, NULL, 8) = 0 > old_mmap(NULL, 1052672, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = >0x4010 > read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = >1048576 > write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = >1048576 > > * BIG ASS SNIP ** > > read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = >1048576 > write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = -1 >EFBIG (File too large) > --- SIGXFSZ (File size limit exceeded) --- > +++ killed by SIGXFSZ +++ > > > > Now, notice the beginning file creation call. It starts out with > O_LARGEFILE but ends with EFBIG. Since I'm not totally familiar with the > kernel code I could be wrong on my next statement and if I am, please tell > me, but it looks like it changes the file creation call from LARGEFILE to > EFBIG (or is this just the error call
Re: [OTP] SMP board recommendations?
Thank you all for your response. Andre (ASL), thanks for the assist. Laurie and Janine took care of me. Asus CUV4X-D mobo with 1GB of buffered ECC RAM. I'm in the process of transfering all the hardware to the new board. I'll let you know if this new board solves the APIC errors and the random lockups under heavy I/O problems. I do have one more problem that I just can NOT track down. 2.4.1-ac10 kernel on the old Abit VP6 mobo. I'm getting curious errors from the 2.4.1, 2.4.1-ac10, and 2.4.2-pre[#] kernels. I've been using dd if=/dev/zero of=/tmp/testdd.img bs=1024k count=1500 for testing of I/O on the various boards I have here. Now, the funny part is that I get "file size limit exceeded" at around 1.0GB. I was getting this under the 2.4.2-pre# kernels so i switched to straight 2.4.1 and got the same problem. I switched to the 2.4.1-ac# line and the problem disappeared. Guess what? It's baaacckk! So, I did a strace of the dd command and got the following from it execve("/bin/dd", ["dd", "if=/dev/zero", "of=/tmp/testing.img", "bs=1024k", "count=1500"], [/* 22 vars */]) = 0 brk(0) = 0x804e7b8 open("/etc/ld.so.preload", O_RDONLY)= -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=7852, ...}) = 0 old_mmap(NULL, 7852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x40015000 close(3)= 0 open("/lib/libc.so.6", O_RDONLY)= 3 fstat(3, {st_mode=S_IFREG|0755, st_size=1183326, ...}) = 0 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200\215"..., 4096) = 4096 old_mmap(NULL, 947548, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x40017000 mprotect(0x400f7000, 30044, PROT_NONE) = 0 old_mmap(0x400f7000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0xdf000) = 0x400f7000 old_mmap(0x400fb000, 13660, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x400fb000 close(3)= 0 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x400ff000 mprotect(0x40017000, 917504, PROT_READ|PROT_WRITE) = 0 mprotect(0x40017000, 917504, PROT_READ|PROT_EXEC) = 0 munmap(0x40015000, 7852)= 0 personality(PER_LINUX) = 0 getpid()= 195 brk(0) = 0x804e7b8 brk(0x804e7f0) = 0x804e7f0 brk(0x804f000) = 0x804f000 open("/dev/zero", O_RDONLY|O_LARGEFILE) = 3 open("/tmp/testing.img", O_RDWR|O_CREAT|O_TRUNC|O_LARGEFILE, 0666) = 4 rt_sigaction(SIGINT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGINT, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGQUIT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGQUIT, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGPIPE, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGPIPE, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGUSR1, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGUSR1, {0x804ae70, [], 0x400}, NULL, 8) = 0 old_mmap(NULL, 1052672, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x4010 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 * BIG ASS SNIP ** read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = -1 EFBIG (File too large) --- SIGXFSZ (File size limit exceeded) --- +++ killed by SIGXFSZ +++ Now, notice the beginning file creation call. It starts out with O_LARGEFILE but ends with EFBIG. Since I'm not totally familiar with the kernel code I could be wrong on my next statement and if I am, please tell me, but it looks like it changes the file creation call from LARGEFILE to EFBIG (or is this just the error call itself?) Now, the kernel is supposed to be able to handle creating a 4TB file(?), so 1.0GB should be nothing to it. NOTHING changed betwen it working and not working. No hardware changes, no software additions, no recompiles of existing applications/daemons.. nothing. So, my question is now one of "What gives?" Any clues on how I can check to see what's going wrong? Is my gut feeling that it's changing the file type wrong? (IIUC, there are different open() calls for different size files? No, I have nothing to base this one, just something I flashed on and thought might explain the problem.) I'm learning here guys, so please be gentle. You folks are the only ones I have with the experience to tell me when I'm just fscked in the head and when I'm bang on. -- David D.W. Downey - RHCE Consulting Engineer Ensim Corporation - Sunnyvale, CA - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at
Re: [OTP] SMP board recommendations?
Thank you all for your response. Andre (ASL), thanks for the assist. Laurie and Janine took care of me. Asus CUV4X-D mobo with 1GB of buffered ECC RAM. I'm in the process of transfering all the hardware to the new board. I'll let you know if this new board solves the APIC errors and the random lockups under heavy I/O problems. I do have one more problem that I just can NOT track down. 2.4.1-ac10 kernel on the old Abit VP6 mobo. I'm getting curious errors from the 2.4.1, 2.4.1-ac10, and 2.4.2-pre[#] kernels. I've been using dd if=/dev/zero of=/tmp/testdd.img bs=1024k count=1500 for testing of I/O on the various boards I have here. Now, the funny part is that I get "file size limit exceeded" at around 1.0GB. I was getting this under the 2.4.2-pre# kernels so i switched to straight 2.4.1 and got the same problem. I switched to the 2.4.1-ac# line and the problem disappeared. Guess what? It's baaacckk! So, I did a strace of the dd command and got the following from it execve("/bin/dd", ["dd", "if=/dev/zero", "of=/tmp/testing.img", "bs=1024k", "count=1500"], [/* 22 vars */]) = 0 brk(0) = 0x804e7b8 open("/etc/ld.so.preload", O_RDONLY)= -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=7852, ...}) = 0 old_mmap(NULL, 7852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x40015000 close(3)= 0 open("/lib/libc.so.6", O_RDONLY)= 3 fstat(3, {st_mode=S_IFREG|0755, st_size=1183326, ...}) = 0 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200\215"..., 4096) = 4096 old_mmap(NULL, 947548, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x40017000 mprotect(0x400f7000, 30044, PROT_NONE) = 0 old_mmap(0x400f7000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0xdf000) = 0x400f7000 old_mmap(0x400fb000, 13660, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x400fb000 close(3)= 0 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x400ff000 mprotect(0x40017000, 917504, PROT_READ|PROT_WRITE) = 0 mprotect(0x40017000, 917504, PROT_READ|PROT_EXEC) = 0 munmap(0x40015000, 7852)= 0 personality(PER_LINUX) = 0 getpid()= 195 brk(0) = 0x804e7b8 brk(0x804e7f0) = 0x804e7f0 brk(0x804f000) = 0x804f000 open("/dev/zero", O_RDONLY|O_LARGEFILE) = 3 open("/tmp/testing.img", O_RDWR|O_CREAT|O_TRUNC|O_LARGEFILE, 0666) = 4 rt_sigaction(SIGINT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGINT, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGQUIT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGQUIT, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGPIPE, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGPIPE, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGUSR1, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGUSR1, {0x804ae70, [], 0x400}, NULL, 8) = 0 old_mmap(NULL, 1052672, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x4010 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 * BIG ASS SNIP ** read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = -1 EFBIG (File too large) --- SIGXFSZ (File size limit exceeded) --- +++ killed by SIGXFSZ +++ Now, notice the beginning file creation call. It starts out with O_LARGEFILE but ends with EFBIG. Since I'm not totally familiar with the kernel code I could be wrong on my next statement and if I am, please tell me, but it looks like it changes the file creation call from LARGEFILE to EFBIG (or is this just the error call itself?) Now, the kernel is supposed to be able to handle creating a 4TB file(?), so 1.0GB should be nothing to it. NOTHING changed betwen it working and not working. No hardware changes, no software additions, no recompiles of existing applications/daemons.. nothing. So, my question is now one of "What gives?" Any clues on how I can check to see what's going wrong? Is my gut feeling that it's changing the file type wrong? (IIUC, there are different open() calls for different size files? No, I have nothing to base this one, just something I flashed on and thought might explain the problem.) I'm learning here guys, so please be gentle. You folks are the only ones I have with the experience to tell me when I'm just fscked in the head and when I'm bang on. -- David D.W. Downey - RHCE Consulting Engineer Ensim Corporation - Sunnyvale, CA - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at
Re: [OTP] SMP board recommendations?
Hi David, Just to let you and the rest of the world in on a secret, 'ASL, Inc.' is the premier ATA server system builder. Jeff Nguyen is the only person that I knew two years ago that was a pioneer and I have shared some information with him before in the past, but here is ATA and it it here to stay. Cheers, Andre Hedrick Linux ATA Development ASL Kernel Development - ASL, Inc. Toll free: 1-877-ASL-3535 1757 Houret Court Fax: 1-408-941-2071 Milpitas, CA 95035Web: www.aslab.com *** shameless toys of creation to challenage the GB/$$ *** http://www.aslab.com/contents/servers/Sovereign-3400T.html http://www.aslab.com/contents/servers/Sovereign-3450T.html On Thu, 15 Feb 2001, David D.W. Downey wrote: Thank you all for your response. Andre (ASL), thanks for the assist. Laurie and Janine took care of me. Asus CUV4X-D mobo with 1GB of buffered ECC RAM. I'm in the process of transfering all the hardware to the new board. I'll let you know if this new board solves the APIC errors and the random lockups under heavy I/O problems. I do have one more problem that I just can NOT track down. 2.4.1-ac10 kernel on the old Abit VP6 mobo. I'm getting curious errors from the 2.4.1, 2.4.1-ac10, and 2.4.2-pre[#] kernels. I've been using dd if=/dev/zero of=/tmp/testdd.img bs=1024k count=1500 for testing of I/O on the various boards I have here. Now, the funny part is that I get "file size limit exceeded" at around 1.0GB. I was getting this under the 2.4.2-pre# kernels so i switched to straight 2.4.1 and got the same problem. I switched to the 2.4.1-ac# line and the problem disappeared. Guess what? It's baaacckk! So, I did a strace of the dd command and got the following from it execve("/bin/dd", ["dd", "if=/dev/zero", "of=/tmp/testing.img", "bs=1024k", "count=1500"], [/* 22 vars */]) = 0 brk(0) = 0x804e7b8 open("/etc/ld.so.preload", O_RDONLY)= -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=7852, ...}) = 0 old_mmap(NULL, 7852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x40015000 close(3)= 0 open("/lib/libc.so.6", O_RDONLY)= 3 fstat(3, {st_mode=S_IFREG|0755, st_size=1183326, ...}) = 0 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200\215"..., 4096) = 4096 old_mmap(NULL, 947548, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x40017000 mprotect(0x400f7000, 30044, PROT_NONE) = 0 old_mmap(0x400f7000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0xdf000) = 0x400f7000 old_mmap(0x400fb000, 13660, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x400fb000 close(3)= 0 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x400ff000 mprotect(0x40017000, 917504, PROT_READ|PROT_WRITE) = 0 mprotect(0x40017000, 917504, PROT_READ|PROT_EXEC) = 0 munmap(0x40015000, 7852)= 0 personality(PER_LINUX) = 0 getpid()= 195 brk(0) = 0x804e7b8 brk(0x804e7f0) = 0x804e7f0 brk(0x804f000) = 0x804f000 open("/dev/zero", O_RDONLY|O_LARGEFILE) = 3 open("/tmp/testing.img", O_RDWR|O_CREAT|O_TRUNC|O_LARGEFILE, 0666) = 4 rt_sigaction(SIGINT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGINT, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGQUIT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGQUIT, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGPIPE, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGPIPE, {0x804ada8, [], 0x400}, NULL, 8) = 0 rt_sigaction(SIGUSR1, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGUSR1, {0x804ae70, [], 0x400}, NULL, 8) = 0 old_mmap(NULL, 1052672, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x4010 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 * BIG ASS SNIP ** read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = -1 EFBIG (File too large) --- SIGXFSZ (File size limit exceeded) --- +++ killed by SIGXFSZ +++ Now, notice the beginning file creation call. It starts out with O_LARGEFILE but ends with EFBIG. Since I'm not totally familiar with the kernel code I could be wrong on my next statement and if I am, please tell me, but it looks like it changes the file creation call from LARGEFILE to EFBIG (or is this just the error call itself?) Now, the kernel is supposed to be able to handle creating a 4TB file(?), so