advise where to post if this is not appropriate.

An amd64/SMP server of mine worked quite reliably for some times.
Now I added two more disks and created a gmirror with them; from that point on it experienced locks, crash and panics.

i use amd64/SMP+gmirror+gstripe+geli and works stable for a long time.
problem is probably somewhere else

Since it was a 6.2 at the time, I immediately upgraded to 6.3: this did not solve, but, at least, the box would reboot and get me a crash dump.

So here it is: looks like the i/o subsystem has problems, so the two disk might really be related, but still, I'm not sure.



# kgdb kernel.debug /var/crash/vmcore.5
[GDB will not be able to debug user-mode threads: /usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"]
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "amd64-marcel-freebsd".

Unread portion of the kernel message buffer:


Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address   = 0x50006
fault code              = supervisor read data, page not present
instruction pointer     = 0x8:0xffffffff8020e076
stack pointer           = 0x10:0xffffffffa831c7a0
frame pointer           = 0x10:0xffffffffa831c7e0
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 88587 (clamscan)
trap number             = 12
panic: page fault
cpuid = 1
Uptime: 3d8h32m39s
Dumping 1023 MB (2 chunks)
  chunk 0: 1MB (151 pages) ... ok
  chunk 1: 1023MB (261744 pages) 1007 991 975 959 943
<110>ipfw: 65534 Deny TCP 192.168.101.1:58319 192.168.101.4:54663 in via fxp0 <110>ipfw: 65534 Deny TCP 192.168.101.1:58319 192.168.101.4:54663 in via fxp0 927 911 895 879 863 847 831 815 799 783 767 751 735 719 703 687 671 655 639 623 607 591 575 559 543 527 511 495 479 463 447 431 415 399 383 367 351 335 319 303 287 271 255 239 223 207 191 175panic: ahd_run_qoutfifo recursion
cpuid = 1
 159 143 127 111 95 79 63 47 31 15

#0  doadump () at pcpu.h:172
172             __asm __volatile("movq %%gs:0,%0" : "=r" (td));
(kgdb) bt
#0  doadump () at pcpu.h:172
#1 0xffffffff80257115 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409 #2 0xffffffff80257825 in panic (fmt=0xffffff00110e9980 "X\023\021\021") at /usr/src/sys/kern/kern_shutdown.c:565 #3 0xffffffff803b50c6 in trap_fatal (frame=0xc, eva=18446742974484093312) at /usr/src/sys/amd64/amd64/trap.c:669 #4 0xffffffff803b546d in trap_pfault (frame=0xffffffffa831c6f0, usermode=0) at /usr/src/sys/amd64/amd64/trap.c:580
#5  0xffffffff803b56cd in trap (frame=
{tf_rdi = -1098891843040, tf_rsi = -1098516260992, tf_rdx = -1098692440992, tf_rcx = 1, tf_r8 = 0, tf_r9 = 327686, tf_rax = 2048, tf_rbx = -1098891843040, tf_rbp = -1473132576, tf_r10 = -1098978658048, tf_r11 = -1098938410752, tf_r12 = -1098516260992, tf_r13 = 327686, tf_r14 = -1098891842864, tf_r15 = -1705935624, tf_trapno = 12, tf_addr = 327686, tf_flags = 1108101564416, tf_err = 0, tf_rip = -2145329034, tf_cs = 8, tf_rflags = 66178, tf_rsp = -1473132624, tf_ss = 16}) at /usr/src/sys/amd64/amd64/trap.c:353 #6 0xffffffff8039c49b in calltrap () at /usr/src/sys/amd64/amd64/exception.S:168 #7 0xffffffff8020e076 in g_io_request (bp=0xffffff0024f12a20, cp=0xffffff003b541780) at /usr/src/sys/geom/geom_io.c:275 #8 0xffffffff803709ad in ufs_strategy (ap=0xffffff0024f12a20) at /usr/src/sys/ufs/ufs/ufs_vnops.c:1973 #9 0xffffffff803e5549 in VOP_STRATEGY_APV (vop=0xffffffff805702c0, a=0xffffffffa831c840) at vnode_if.c:1796 #10 0xffffffff802b510c in bufstrategy (bo=0xffffff0024f12a20, bp=0xffffff003b541780) at vnode_if.h:928 #11 0xffffffff802b4575 in breadn (vp=0xffffff000a55eba0, blkno=-1098516260992, size=819186784, rablkno=0x0, rabsize=0x0, cnt=0, cred=0x0, bpp=0x800) at buf.h:426 #12 0xffffffff802b48fe in bread (vp=0xffffff0024f12a20, blkno=-1098516260992, size=819186784, cred=0x1, bpp=0x0) at /usr/src/sys/kern/vfs_bio.c:723 #13 0xffffffff80363886 in ffs_read (ap=0xffffff0024f12a20) at /usr/src/sys/ufs/ffs/ffs_vnops.c:523 #14 0xffffffff803e3efa in VOP_READ_APV (vop=0x800, a=0xffffff003b541780) at vnode_if.c:643 #15 0xffffffff80370649 in ufs_readdir (ap=0xffffffffa831cad0) at vnode_if.h:343 #16 0xffffffff803e419d in VOP_READDIR_APV (vop=0x800, a=0xffffff003b541780) at vnode_if.c:1427 #17 0xffffffff802d0657 in getdirentries (td=0xffffff00110e9980, uap=0xffffffffa831cbc0) at vnode_if.h:746
#18 0xffffffff803b6052 in syscall (frame=
{tf_rdi = 4, tf_rsi = 58564608, tf_rdx = 4096, tf_rcx = 58550056, tf_r8 = 0, tf_r9 = 140737488347784, tf_rax = 196, tf_rbx = 58550016, tf_rbp = 58550016, tf_r10 = 34367908128, tf_r11 = 58626048, tf_r12 = 5320784, tf_r13 = 58550016, tf_r14 = 58540768, tf_r15 = 3, tf_trapno = 12, tf_addr = 34365898752, tf_flags = 31845, tf_err = 2, tf_rip = 34377717596, tf_cs = 43, tf_rflags = 582, tf_rsp = 140737488348456, tf_ss = 35}) at /usr/src/sys/amd64/amd64/trap.c:807 #19 0xffffffff8039c698 in Xfast_syscall () at /usr/src/sys/amd64/amd64/exception.S:287
#20 0x000000080112575c in ?? ()
Previous frame inner to this frame (corrupt stack?)


I guess what I should look into is this (see the arrow):

(kgdb) list
270 KASSERT(bp->bio_length % cp->provider->sectorsize
== 0,
271                         ("wrong length %jd for sectorsize %u",
272                         bp->bio_length, cp->provider->sectorsize));
273             }
274
275 ------->>> g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd
%d",
276                 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
277
278             bp->bio_from = cp;
279             bp->bio_to = pp;

(kgdb) p bp
$9 = (struct bio *) 0xffffff0024f12a20
(kgdb) p cp
$10 = (struct g_consumer *) 0xffffff003b541780
(kgdb) p pp
$11 = (struct g_provider *) 0x50006
(kgdb) p *bp
$12 = {bio_cmd = 1 '\001', bio_flags = 0 '\0', bio_cflags = 0 '\0', bio_pflags = 0 '\0', bio_dev = 0x0, bio_disk = 0x0, bio_offset = 6160384, bio_bcount = 0, bio_data = 0xffffffff9dfdc000 "[EMAIL PROTECTED]"\200~@", bio_error = 0, bio_resid = 0, bio_done = 0xffffffff802110c0 <g_vfs_done>, bio_driver1 = 0x0, bio_driver2 = 0x0, bio_caller1 = 0x0, bio_caller2 = 0xffffffff9a517cf8, bio_queue = {tqe_next = 0x0, tqe_prev = 0x0}, bio_attribute = 0x0, bio_from = 0x0, bio_to = 0x0, bio_length = 2048, bio_completed = 0, bio_children = 0, bio_inbed = 0, bio_parent = 0x0, bio_t0 = {sec = 0, frac = 0}, bio_task = 0, bio_task_arg = 0x0, bio_pblkno = 0}
(kgdb) p *cp
$13 = {geom = 0xffffff0030d3cc60, consumer = {le_next = 0x0, le_prev = 0xffffffff9a6718f8}, provider = 0xd0006, consumers = {le_next = 0xffffff003b541380, le_prev = 0xffffff002e4072a0}, acr = 680, acw = 0, ace = 6531640, spoiled = 0, stat = 0x0, nstart = 0, nend = 0, private = 0xffffff0002f8b3c0, index = 995365760}
(kgdb) p *pp
Cannot access memory at address 0x50006
(kgdb)


Can anyone provide some insight?



bye & Thanks
        av.

_______________________________________________
freebsd-questions@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-questions
To unsubscribe, send any mail to "[EMAIL PROTECTED]"


_______________________________________________
freebsd-questions@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-questions
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to