Here some additional information on the issue: Stack-trace core0 converted from [http://bytelabs.org/openbsd-6.0-bug-report/0-panic.png] using OCR: > OpenBSD/amd64 (openbsd.bytelabs.org) (ttyC0) > login: mode = 0100644, inum = 9348113, fs = /home > panic: ffs_valloc: dup alloc > Stopped at Debugger+0x9: leave > TID PID UID PRFLAGS PFLAGS CPU COMMAND > * 64363 64363 1000 0x100003 0 0 tar > 50876 50876 0 0x100003 0 2 tar > 34099 34099 73 0x100010 0x80 1 syslogd > Debugger() at Debugger+ox9 > panic() at panic+0xfe > ffs_inode_alloc() at ffs_inode_alloc+0x408 > ufs_makeinode() at ufs_makeinode+0x85 > ufs_create() at ufs_create+0x34 > VOP_CREATE() at VOP_CREATE+0x3d > vn_open() at vn_open+0x2cd > dnoopenat() at doopenat+0x187 > syscall() at syscall+0x27b > --- syscall (number 5) --- > end of kernel > end trace frame: 0x7f7ffffc8494, count: 6 > 0x1441964373ca: > http://www.openbsd.org/ddb.html describes the minimum info required in bug > Reports. Insufficient info makes it difficult to find and fix bugs. > ddb{0}>
Stack-trace core2 converted from [http://bytelabs.org/openbsd-6.0-bug-report/3-core2%20trace.png] using OCR: > ddb{1}> mach ddbcpu 2 > Stopped at Debugger+0x9: leave > Debugger() at Debugger+0x9 > x86_ipi_handler() at x86_ipi_handler+0x76 > Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1c > --- interrupt --- > __mp_lock() at __mp_lock+0x42 > __mp_acquire_count() at __mp_acquire_count+0x2b > mi_switch() at mi_switch+0x21e > sleep_finish() at sleep_finish+0xb1 > tsleep() at tsleep+0x154 > biowait() at biowait+0x46 > bwrite() at bwrite+0x10d > ffs_update() at ffs_update+0x2bd > ufs_makeinode() at ufs_makeinode+0x1da > ufs_create() at ufs_create+0x34 > VOP_CREATE() at VOP_CREATE+ox3d > end trace frame: 0xffff80002224ec30, count: 0 > ddb{2}> From these two traces we see that both cores are executing ufs_create() which then calls ufs_makeinode(). Skimming over ufs_makeinode() we see that core0 is executing ffs_inode_alloc() which is one of the first functions called by ufs_makeinode(). Core2 is executing ffs_update() which happens *after* having executed ffs_inode_alloc(). Here is the function in question with some highlighting: > int > ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, > struct componentname *cnp) > { > struct inode *ip, *pdir; > struct direct newdir; > struct vnode *tvp; > int error; > > pdir = VTOI(dvp); > #ifdef DIAGNOSTIC > if ((cnp->cn_flags & HASBUF) == 0) > panic("ufs_makeinode: no name"); > #endif > *vpp = NULL; > if ((mode & IFMT) == 0) > mode |= IFREG; > > if ((error = UFS_INODE_ALLOC(pdir, mode, cnp->cn_cred, &tvp)) != 0) { Core0 is currently executing UFS_INODE_ALLOC aka ffs_inode_alloc() in this context. Core2 has already executed this function. > pool_put(&namei_pool, cnp->cn_pnbuf); > vput(dvp); > return (error); > } > > ip = VTOI(tvp); > > DIP_ASSIGN(ip, gid, DIP(pdir, gid)); > DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid); > > if ((error = getinoquota(ip)) || > (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) { > pool_put(&namei_pool, cnp->cn_pnbuf); > UFS_INODE_FREE(ip, ip->i_number, mode); > vput(tvp); > vput(dvp); > return (error); > } > > ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; > DIP_ASSIGN(ip, mode, mode); > tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ > ip->i_effnlink = 1; > DIP_ASSIGN(ip, nlink, 1); > if (DOINGSOFTDEP(tvp)) > softdep_change_linkcnt(ip, 0); > if ((DIP(ip, mode) & ISGID) && > !groupmember(DIP(ip, gid), cnp->cn_cred) && > suser_ucred(cnp->cn_cred)) > DIP_AND(ip, mode, ~ISGID); > > /* > * Make sure inode goes to disk before directory entry. > */ > if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(tvp))) != 0) Core2 is currently executing UFS_UPDATE aka ffs_update() > goto bad; > > ufs_makedirentry(ip, cnp, &newdir); > if ((error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL)) != 0) > goto bad; > > if ((cnp->cn_flags & SAVESTART) == 0) > pool_put(&namei_pool, cnp->cn_pnbuf); > vput(dvp); > *vpp = tvp; > return (0); > > bad: > /* > * Write error occurred trying to update the inode > * or the directory so must deallocate the inode. > */ > pool_put(&namei_pool, cnp->cn_pnbuf); > vput(dvp); > ip->i_effnlink = 0; > DIP_ASSIGN(ip, nlink, 0); > ip->i_flag |= IN_CHANGE; > if (DOINGSOFTDEP(tvp)) > softdep_change_linkcnt(ip, 0); > tvp->v_type = VNON; > vput(tvp); > > return (error); > } The actual panic happens in the ffs_inode_alloc() following function, see highlighted code: > int > ffs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred, > struct vnode **vpp) > { > static struct timeval fsnoinodes_last; > struct vnode *pvp = ITOV(pip); > struct fs *fs; > struct inode *ip; > ufsino_t ino, ipref; > int cg, error; > > *vpp = NULL; > fs = pip->i_fs; > if (fs->fs_cstotal.cs_nifree == 0) > goto noinodes; > > if ((mode & IFMT) == IFDIR) > ipref = ffs_dirpref(pip); > else > ipref = pip->i_number; > if (ipref >= fs->fs_ncg * fs->fs_ipg) > ipref = 0; > cg = ino_to_cg(fs, ipref); > > /* > * Track number of dirs created one after another > * in a same cg without intervening by files. > */ > if ((mode & IFMT) == IFDIR) { > if (fs->fs_contigdirs[cg] < 255) > fs->fs_contigdirs[cg]++; > } else { > if (fs->fs_contigdirs[cg] > 0) > fs->fs_contigdirs[cg]--; > } > ino = (ufsino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg); > if (ino == 0) > goto noinodes; > error = VFS_VGET(pvp->v_mount, ino, vpp); > if (error) { > ffs_inode_free(pip, ino, mode); > return (error); > } > > ip = VTOI(*vpp); > > if (DIP(ip, mode)) { > printf("mode = 0%o, inum = %u, fs = %s\n", > DIP(ip, mode), ip->i_number, fs->fs_fsmnt); > panic("ffs_valloc: dup alloc"); This is where we bail out! The following field is expected to be zero but isn't for some reason: struct ufs2_dinode { u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ int16_t di_nlink; /* 2: File link count. */ u_int32_t di_uid; /* 4: File owner. */ ... }; The following show what values di_mode can have I believe: /* File permissions. */ #define IEXEC 0000100 /* Executable. */ #define IWRITE 0000200 /* Writeable. */ #define IREAD 0000400 /* Readable. */ #define ISVTX 0001000 /* Sticky bit. */ #define ISGID 0002000 /* Set-gid. */ #define ISUID 0004000 /* Set-uid. */ /* File types. */ #define IFMT 0170000 /* Mask of file type. */ #define IFIFO 0010000 /* Named pipe (fifo). */ #define IFCHR 0020000 /* Character device. */ #define IFDIR 0040000 /* Directory file. */ #define IFBLK 0060000 /* Block device. */ #define IFREG 0100000 /* Regular file. */ #define IFLNK 0120000 /* Symbolic link. */ #define IFSOCK 0140000 /* UNIX domain socket. */ #define IFWHT 0160000 /* Whiteout. */ > } > > if (DIP(ip, blocks)) { > printf("free inode %s/%d had %lld blocks\n", > fs->fs_fsmnt, ino, (long long)DIP(ip, blocks)); > DIP_ASSIGN(ip, blocks, 0); > } > > DIP_ASSIGN(ip, flags, 0); > > /* > * Set up a new generation number for this inode. > * XXX - just increment for now, this is wrong! (millert) > * Need a way to preserve randomization. > */ > if (DIP(ip, gen) != 0) > DIP_ADD(ip, gen, 1); > if (DIP(ip, gen) == 0) > DIP_ASSIGN(ip, gen, arc4random() & INT_MAX); > > if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1) > DIP_ASSIGN(ip, gen, 1); /* Shouldn't happen */ > > return (0); > > noinodes: > if (ratecheck(&fsnoinodes_last, &fserr_interval)) { > ffs_fserr(fs, cred->cr_uid, "out of inodes"); > uprintf("\n%s: create/symlink failed, no inodes free\n", > fs->fs_fsmnt); > } > return (ENOSPC); > } It looks like Core0 tries to allocate something that is currently being processed/updated by Core2. HTH Cheers, Igor On 7 Sep 2016, at 02:12, Igor Boehm <[email protected]> wrote: > This bug-report with all information can also be found here: > - http://bytelabs.org/openbsd-6.0-bug-report/ > > 1. The exact sequence of steps from startup necessary to reproduce the > problem: > - Boot freshly installed OpenBSD6.0/amd64/bsd.mp system running on the > following vServer [https://www.hetzner.de/lt/hosting/produkte_vserver/cx50] > - Login via SSH > - Copy ports.tgz into /tmp > - Extract /tmp/ports.tgz into /usr > - Second login via SSH > - Extract /tmp/ports.tgz into /home > - Kernel panic occurs > > 2. The output you got: > 2.1 Panic Message: [panic.png] > 2.2 Traces for Core0-Core3: [core0 trace.png ... core3 trace.png] > 2.3 PS output: [ps output 0.png ... ps output 2.png] > 2.4 Output of show uvm: [show uvm.png] > 2.5 Output of show bcstats: [show bcstats.png] > 2.6 mount output (below) > 2.7 /etc/fstab contents (below) > 2.8 /var/run/dmesg.boot contents (below) > > 3. Probably completely wrong guess at what the problem is but I try anyway: > - Core0 is executing ufs_makeinode() without acquiring any locks (i.e. I do > not see any __mp_lock() in the trace) [core0 trace.png]. > > - Core2 is executing ufs_makeinode() as well *at the same time* [core2 > trace.png] and it seems to include some form of synchronisation (i.e. > __mp_lock()). > > - It seems like a race condition leading to a corrupted data-structure. > > $ mount > /dev/sd0a on / type ffs (local) > /dev/sd0k on /home type ffs (local, nodev, nosuid) > /dev/sd0d on /tmp type ffs (local, nodev, nosuid) > /dev/sd0f on /usr type ffs (local, nodev) > /dev/sd0g on /usr/X11R6 type ffs (local, nodev) > /dev/sd0h on /usr/local type ffs (local, nodev, wxallowed) > /dev/sd0j on /usr/obj type ffs (local, nodev, nosuid) > /dev/sd0i on /usr/src type ffs (local, nodev, nosuid) > /dev/sd0e on /var type ffs (local, nodev, nosuid) > > $ cat /etc/fstab > d7be661125b20dff.b none swap sw > d7be661125b20dff.a / ffs rw 1 1 > d7be661125b20dff.k /home ffs rw,nodev,nosuid 1 2 > d7be661125b20dff.d /tmp ffs rw,nodev,nosuid 1 2 > d7be661125b20dff.f /usr ffs rw,nodev 1 2 > d7be661125b20dff.g /usr/X11R6 ffs rw,nodev 1 2 > d7be661125b20dff.h /usr/local ffs rw,wxallowed,nodev 1 2 > d7be661125b20dff.j /usr/obj ffs rw,nodev,nosuid 1 2 > d7be661125b20dff.i /usr/src ffs rw,nodev,nosuid 1 2 > d7be661125b20dff.e /var ffs rw,nodev,nosuid 1 2 > > $ cat /var/run/dmesg.boot > OpenBSD 6.0 (GENERIC.MP) #2319: Tue Jul 26 13:00:43 MDT 2016 > [email protected]:/usr/src/sys/arch/amd64/compile/GENERIC.MP > real mem = 17162960896 (16367MB) > avail mem = 16638332928 (15867MB) > mpath0 at root > scsibus0 at mpath0: 256 targets > mainbus0 at root > bios0 at mainbus0: SMBIOS rev. 2.8 @ 0xf62e0 (10 entries) > bios0: vendor SeaBIOS version "1.8.2" date 04/01/2014 > bios0: Hetzner vServer > acpi0 at bios0: rev 0 > acpi0: sleep states S5 > acpi0: tables DSDT FACP APIC HPET > acpi0: wakeup devices > acpitimer0 at acpi0: 3579545 Hz, 24 bits > acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat > cpu0 at mainbus0: apid 0 (boot processor) > cpu0: Intel(R) Xeon(R) CPU E5-26xx (Sandy Bridge), 2100.31 MHz > cpu0: > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,SSSE3,CX16,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,HV,NXE,LONG,LAHF,ARAT > cpu0: 64KB 64b/line 2-way I-cache, 64KB 64b/line 2-way D-cache, 512KB > 64b/line 16-way L2 cache > cpu0: ITLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu0: DTLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu0: smt 0, core 0, package 0 > mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges > cpu0: apic clock running at 999MHz > cpu1 at mainbus0: apid 1 (application processor) > cpu1: Intel(R) Xeon(R) CPU E5-26xx (Sandy Bridge), 2100.03 MHz > cpu1: > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,SSSE3,CX16,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,HV,NXE,LONG,LAHF,ARAT > cpu1: 64KB 64b/line 2-way I-cache, 64KB 64b/line 2-way D-cache, 512KB > 64b/line 16-way L2 cache > cpu1: ITLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu1: DTLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu1: smt 0, core 1, package 0 > cpu2 at mainbus0: apid 2 (application processor) > cpu2: Intel(R) Xeon(R) CPU E5-26xx (Sandy Bridge), 2100.01 MHz > cpu2: > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,SSSE3,CX16,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,HV,NXE,LONG,LAHF,ARAT > cpu2: 64KB 64b/line 2-way I-cache, 64KB 64b/line 2-way D-cache, 512KB > 64b/line 16-way L2 cache > cpu2: ITLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu2: DTLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu2: smt 0, core 2, package 0 > cpu3 at mainbus0: apid 3 (application processor) > cpu3: Intel(R) Xeon(R) CPU E5-26xx (Sandy Bridge), 2100.01 MHz > cpu3: > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,SSSE3,CX16,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,HV,NXE,LONG,LAHF,ARAT > cpu3: 64KB 64b/line 2-way I-cache, 64KB 64b/line 2-way D-cache, 512KB > 64b/line 16-way L2 cache > cpu3: ITLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu3: DTLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped > cpu3: smt 0, core 3, package 0 > ioapic0 at mainbus0: apid 0 pa 0xfec00000, version 11, 24 pins > acpihpet0 at acpi0: 100000000 Hz > acpiprt0 at acpi0: bus 0 (PCI0) > acpicpu0 at acpi0: C1(@1 halt!) > acpicpu1 at acpi0: C1(@1 halt!) > acpicpu2 at acpi0: C1(@1 halt!) > acpicpu3 at acpi0: C1(@1 halt!) > "ACPI0006" at acpi0 not configured > "PNP0303" at acpi0 not configured > "PNP0F13" at acpi0 not configured > "PNP0700" at acpi0 not configured > "PNP0501" at acpi0 not configured > "PNP0A06" at acpi0 not configured > "PNP0A06" at acpi0 not configured > "QEMU0002" at acpi0 not configured > "PNP0A06" at acpi0 not configured > pvbus0 at mainbus0: KVM > pci0 at mainbus0 bus 0 > pchb0 at pci0 dev 0 function 0 "Intel 82441FX" rev 0x02 > pcib0 at pci0 dev 1 function 0 "Intel 82371SB ISA" rev 0x00 > pciide0 at pci0 dev 1 function 1 "Intel 82371SB IDE" rev 0x00: DMA, channel 0 > wired to compatibility, channel 1 wired to compatibility > pciide0: channel 0 disabled (no drives) > atapiscsi0 at pciide0 channel 1 drive 0 > scsibus1 at atapiscsi0: 2 targets > cd0 at scsibus1 targ 0 lun 0: <QEMU, QEMU DVD-ROM, 2.5+> ATAPI 5/cdrom > removable > cd0(pciide0:1:0): using PIO mode 4, DMA mode 2 > uhci0 at pci0 dev 1 function 2 "Intel 82371SB USB" rev 0x01: apic 0 int 11 > piixpm0 at pci0 dev 1 function 3 "Intel 82371AB Power" rev 0x03: apic 0 int 9 > iic0 at piixpm0 > vga1 at pci0 dev 2 function 0 "Bochs VGA" rev 0x02 > wsdisplay0 at vga1 mux 1: console (80x25, vt100 emulation) > wsdisplay0: screen 1-5 added (80x25, vt100 emulation) > virtio0 at pci0 dev 3 function 0 "Qumranet Virtio Network" rev 0x00 > vio0 at virtio0: address 52:54:a2:01:85:cd > virtio0: msix shared > virtio1 at pci0 dev 4 function 0 "Qumranet Virtio SCSI" rev 0x00 > vioscsi0 at virtio1: qsize 128 > scsibus2 at vioscsi0: 255 targets > sd0 at scsibus2 targ 0 lun 0: <QEMU, QEMU HARDDISK, 2.5+> SCSI3 0/direct fixed > sd0: 390625MB, 512 bytes/sector, 800000000 sectors, thin > virtio1: msix shared > virtio2 at pci0 dev 5 function 0 "Qumranet Virtio Memory" rev 0x00 > viomb0 at virtio2 > virtio2: apic 0 int 10 > isa0 at pcib0 > isadma0 at isa0 > fdc0 at isa0 port 0x3f0/6 irq 6 drq 2 > fd0 at fdc0 drive 1: density unknown > com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo > pckbc0 at isa0 port 0x60/5 irq 1 irq 12 > pckbd0 at pckbc0 (kbd slot) > wskbd0 at pckbd0: console keyboard, using wsdisplay0 > pms0 at pckbc0 (aux slot) > wsmouse0 at pms0 mux 0 > pcppi0 at isa0 port 0x61 > spkr0 at pcppi0 > usb0 at uhci0: USB revision 1.0 > uhub0 at usb0 "Intel UHCI root hub" rev 1.00/1.00 addr 1 > nvram: invalid checksum > uhidev0 at uhub0 port 1 configuration 1 interface 0 "QEMU QEMU USB Tablet" > rev 2.00/0.00 addr 2 > uhidev0: iclass 3/0 > ums0 at uhidev0: 3 buttons, Z dir > wsmouse1 at ums0 mux 0 > vscsi0 at root > scsibus3 at vscsi0: 256 targets > softraid0 at root > scsibus4 at softraid0: 256 targets > root on sd0a (d7be661125b20dff.a) swap on sd0b dump on sd0b > clock: unknown CMOS layout > > If needed I can provide access to the machine for further testing. > > Thanks in advance for your help. > > Cheers, > Igor >
