Hi,
I've managed to reproduce a hung by enabling pfsync over ipsec.
I believe that it might have to do with a feedback effect caused by self
generated packets,
that shouldn't be synced to remote firewall.
The problem is also related with enc0 because if I disable isakmpd and
use syncdev vlan21 for pfsync
then I have no hung.
System is latest snapshot (01/03/2013) with current kernel from CVS.
Giannis
details:
# ifconfig pfsync0 up
# ifconfig pfsync0 syncpeer 10.0.21.1 syncdev enc0
# ping 10.1.20.1
PING 10.1.20.1 (10.1.20.1): 56 data bytes
64 bytes from 10.1.20.1: icmp_seq=0 ttl=255 time=0.344 ms
64 bytes from 10.1.20.1: icmp_seq=1 ttl=255 time=0.210 ms
64 bytes from 10.1.20.1: icmp_seq=2 ttl=255 time=0.190 ms
64 bytes from 10.1.20.1: icmp_seq=3 ttl=255 time=0.188 ms
64 bytes from 10.1.20.1: icmp_seq=4 ttl=255 time=0.242 ms
64 bytes from 10.1.20.1: icmp_seq=5 ttl=255 time=0.344 ms
uvm_fault(0xd0a52660, 0x0, 0, 1) -> e
kernel: page fault trap, code=0
Stopped at ether_input+0xa9: testb $0x1,0(%edx)
ddb> show panic
the kernel did not panic
ddb> trace
ether_input(d2e8e030,0,d8e69800,7fe,2) at ether_input+0xa9
bge_rxeof(d2e8e000,d2e8ba80,12000,50,5) at bge_rxeof+0x1c2
bge_intr(d2e8e000) at bge_intr+0x15a
Xintr_ioapic2() at Xintr_ioapic2+0x69
--- interrupt ---
cpu_idle_cycle(d0b0e7c0) at cpu_idle_cycle+0xf
Bad frame pointer: 0xd0bc9e28
----- further info -----
# cd /usr/src/sys/arch/i386/compile/GENERIC
# rm if_ethersubr.o
# DEBUG=-g make if_ethersubr.o
# objdump --line --disassemble --reloc if_ethersubr.o > if_ethersubr.dis
# grep "<ether_input>": if_ethersubr.dis
00000f70 <ether_input>:
0xf70 + 0xa9 == 0x1019
# more if_ethersubr.dis
../../../../net/if_ethersubr.c:504
1016: 8b 55 c8 mov 0xffffffc8(%ebp),%edx
1019: f6 02 01 testb $0x1,(%edx)
101c: 0f 85 09 01 00 00 jne 112b <ether_input+0x1bb>
# cat -n
/usr/src/sys/arch/i386/compile/GENERIC/../../../../net/if_ethersubr.c |
head -n 504 | tail -n 1
504 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
------------------------
# ifconfig -a
bge0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
lladdr 00:0a:e4:84:41:a5
description: External
priority: 0
groups: egress
media: Ethernet autoselect (1000baseT full-duplex)
status: active
inet6 fe80::20a:e4ff:fe84:41a5%bge0 prefixlen 64 scopeid 0x1
inet 192.168.0.1 netmask 0xffffff00 broadcast 192.168.0.255
bge1: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
lladdr 00:0a:e4:84:41:a4
description: Internal
priority: 0
media: Ethernet autoselect (1000baseT full-duplex)
status: active
inet6 fe80::20a:e4ff:fe84:41a4%bge1 prefixlen 64 scopeid 0x2
enc0: flags=0<>
priority: 0
groups: enc
status: active
vlan21: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
lladdr 00:0a:e4:84:41:a4
description: pfsync
priority: 0
vlan: 21 parent interface: bge1
groups: vlan
status: active
inet6 fe80::20a:e4ff:fe84:41a4%vlan21 prefixlen 64 scopeid 0x5
inet 10.0.21.2 netmask 0xfffffffc broadcast 10.0.21.3
vlan20: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
lladdr 00:0a:e4:84:41:a4
description: TEST
priority: 0
vlan: 20 parent interface: bge1
groups: vlan
status: active
inet6 fe80::20a:e4ff:fe84:41a4%vlan20 prefixlen 64 scopeid 0x6
inet 10.1.20.2 netmask 0xffffff00 broadcast 10.1.20.255
ipsec.conf:
auth_algo="hmac-sha1"
enc_algo="aes-256"
dh_group="modp1024"
phase1_time="24h"
phase2_time="1h"
ike active esp from 10.0.21.2 to 10.0.21.1 peer 10.0.21.1 \
main auth $auth_algo enc $enc_algo group $dh_group lifetime
$phase1_time \
quick auth $auth_algo enc $enc_algo group $dh_group lifetime
$phase2_time
pf.conf:
set hostid 2
ext_if=bge0
sync_if=vlan21
test_if=vlan20
icmp_types="{echoreq unreach}"
set skip on {lo, enc0, $ext_if, $sync_if}
block
pass out quick on $test_if from ($test_if) modulate state
pass in quick inet proto icmp all icmp-type $icmp_types keep state (no-sync)
pass out log
block in quick log
if 2nd rule is converted to
pass out quick on $test_if from ($test_if) modulate state (no-sync)
then we have no hung
------------------------
ddb> ps
PID PPID PGRP UID S FLAGS WAIT COMMAND
7494 25640 7494 0 3 0x80 netio ping
8189 18748 18748 0 3 0x80 netio tcpdump
18748 18177 18748 76 3 0x80 bpf tcpdump
2027 8644 8644 0 3 0x80 netio tcpdump
8644 10277 8644 76 3 0x80 bpf tcpdump
18177 16909 18177 0 3 0x88 pause tcsh
16909 3984 16909 0 3 0x88 pause ksh
3984 27150 3984 0 3 0x80 select sshd
25640 10101 25640 0 3 0x88 pause tcsh
10277 16518 10277 0 3 0x88 pause tcsh
16518 9085 16518 0 3 0x88 pause ksh
9085 27150 9085 0 3 0x80 select sshd
10101 1 10101 0 3 0x88 pause ksh
25009 1 25009 0 3 0x80 ttyin getty
6478 1 6478 0 3 0x80 ttyin getty
26677 1 26677 0 3 0x80 ttyin getty
29147 1 29147 0 3 0x80 ttyin getty
19702 1 19702 0 3 0x80 ttyin getty
12022 1 12022 0 3 0x80 select cron
27150 1 27150 0 3 0x80 select sshd
31765 28197 28197 68 3 0x80 select isakmpd
28197 1 28197 0 3 0x80 netio isakmpd
28934 10010 6271 83 3 0x80 poll ntpd
10010 6271 6271 83 3 0x80 poll ntpd
6271 1 6271 0 3 0x80 poll ntpd
2673 14322 14322 74 3 0x80 bpf pflogd
14322 1 14322 0 3 0x80 netio pflogd
3327 8330 8330 73 2 0x80 syslogd
8330 1 8330 0 3 0x80 netio syslogd
15 0 0 0 3 0x100200 aiodoned aiodoned
14 0 0 0 3 0x100200 syncer update
13 0 0 0 3 0x100200 cleaner cleaner
12 0 0 0 3 0x100200 reaper reaper
11 0 0 0 3 0x100200 pgdaemon pagedaemon
10 0 0 0 3 0x100200 bored srdis
9 0 0 0 3 0x100200 bored crypto
8 0 0 0 3 0x100200 pftm pfpurge
7 0 0 0 3 0x100200 usbtsk usbtask
6 0 0 0 3 0x100200 usbatsk usbatsk
5 0 0 0 3 0x100200 acpi0 acpi0
4 0 0 0 3 0x100200 bored syswq
* 3 0 0 0 7 0x40100200 idle0
2 0 0 0 3 0x100200 kmalloc kmthread
1 0 1 0 3 0x80 wait init
0 -1 0 0 3 0x200 scheduler swapper
ddb> dmesg
OpenBSD 5.3-current (GENERIC) #0: Wed Mar 6 13:10:05 EET 2013
root@localhost:/usr/src/sys/arch/i386/compile/GENERIC
cpu0: Intel(R) Xeon(R) CPU 3040 @ 1.86GHz ("GenuineIntel" 686-class)
1.87 GHz
cpu0:
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,C
FLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,NXE,LONG,SSE3,DTES64,MWAIT,DS-CPL
,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,LAHF,PERF
real mem = 2145837056 (2046MB)
avail mem = 2099798016 (2002MB)
mainbus0 at root
bios0 at mainbus0: AT/286+ BIOS, date 05/10/07, BIOS32 rev. 0 @ 0xfd680,
SMBIOS
rev. 2.34 @ 0x7feee000 (53 entries)
bios0: vendor FUJITSU SIEMENS // Phoenix Technologies Ltd. version
"4.06 Rev. 1
.07.2532" date 05/10/2007
bios0: FUJITSU SIEMENS PRIMERGY RX100 S4
acpi0 at bios0: rev 2
acpi0: sleep states S0 S1 S4 S5
acpi0: tables DSDT FACP SPCR MCFG APIC BOOT
acpi0: wakeup devices PEXA(S4) PEXB(S4) PXH0(S4) PEXC(S4) PEXD(S4)
PCIH(S4) USB
1(S4) USB2(S4) USB3(S4) USB4(S4) USB5(S4) KEYB(S4) PS2M(S4) COM1(S1)
COM2(S1)
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimcfg0 at acpi0 addr 0xf0000000, bus 0-14
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: apic clock running at 266MHz
cpu at mainbus0: not configured
ioapic0 at mainbus0: apid 2 pa 0xfec00000, version 20, 24 pins
ioapic1 at mainbus0: apid 3 pa 0xfec10000, version 20, 24 pins
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus 10 (PEXA)
acpiprt2 at acpi0: bus 1 (PEXB)
acpiprt3 at acpi0: bus 2 (PXH0)
acpiprt4 at acpi0: bus 7 (PEXC)
acpiprt5 at acpi0: bus 9 (PEXD)
acpiprt6 at acpi0: bus 15 (PCIH)
acpicpu0 at acpi0
acpibtn0 at acpi0: PWRB
bios0: ROM list: 0xc0000/0x8000
ipmi at mainbus0 not configured
cpu0: Enhanced SpeedStep disabled by BIOS
memory map conflict 0xfff00000/0x100000
pci0 at mainbus0 bus 0: configuration mode 1 (bios)
pchb0 at pci0 dev 0 function 0 "Intel E7230 Host" rev 0xc0
ppb0 at pci0 dev 1 function 0 "Intel E7230 PCIE" rev 0xc0: apic 2 int 16
pci1 at ppb0 bus 10
ppb1 at pci0 dev 28 function 0 "Intel 82801GB PCIE" rev 0x01: apic 2 int 17
pci2 at ppb1 bus 1
ppb2 at pci2 dev 0 function 0 "Intel 6702PXH PCIE-PCIX" rev 0x09
pci3 at ppb2 bus 2
ppb3 at pci0 dev 28 function 4 "Intel 82801G PCIE" rev 0x01: apic 2 int 17
pci4 at ppb3 bus 7
ppb4 at pci4 dev 0 function 0 "ServerWorks PCIE-PCIX" rev 0xb5
pci5 at ppb4 bus 8
bge0 at pci5 dev 4 function 0 "Broadcom BCM5715" rev 0xa3, BCM5715 A3
(0x9003):
apic 2 int 16, address 00:0a:e4:84:41:a5
brgphy0 at bge0 phy 1: BCM5714 10/100/1000baseT/SX PHY, rev. 0
bge1 at pci5 dev 4 function 1 "Broadcom BCM5715" rev 0xa3, BCM5715 A3
(0x9003):
apic 2 int 17, address 00:0a:e4:84:41:a4
brgphy1 at bge1 phy 1: BCM5714 10/100/1000baseT/SX PHY, rev. 0
ppb5 at pci0 dev 28 function 5 "Intel 82801G PCIE" rev 0x01: apic 2 int 16
pci6 at ppb5 bus 9
vga1 at pci6 dev 0 function 0 "Matrox MGA G200e (ServerEngines)" rev 0x02
wsdisplay0 at vga1 mux 1: console (80x25, vt100 emulation)
wsdisplay0: screen 1-5 added (80x25, vt100 emulation)
uhci0 at pci0 dev 29 function 0 "Intel 82801GB USB" rev 0x01: apic 2 int 23
uhci1 at pci0 dev 29 function 1 "Intel 82801GB USB" rev 0x01: apic 2 int 22
uhci2 at pci0 dev 29 function 2 "Intel 82801GB USB" rev 0x01: apic 2 int 21
uhci3 at pci0 dev 29 function 3 "Intel 82801GB USB" rev 0x01: apic 2 int 20
ehci0 at pci0 dev 29 function 7 "Intel 82801GB USB" rev 0x01: apic 2 int 23
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 "Intel EHCI root hub" rev 2.00/1.00 addr 1
ppb6 at pci0 dev 30 function 0 "Intel 82801BA Hub-to-PCI" rev 0xe1
pci7 at ppb6 bus 15
ichpcib0 at pci0 dev 31 function 0 "Intel 82801GB LPC" rev 0x01: PM disabled
pciide0 at pci0 dev 31 function 1 "Intel 82801GB IDE" rev 0x01: DMA,
channel 0 c
onfigured to compatibility, channel 1 configured to compatibility
atapiscsi0 at pciide0 channel 0 drive 0
scsibus0 at atapiscsi0: 2 targets
cd0 at scsibus0 targ 0 lun 0: <HL-DT-ST, DVDRAM GSA-T20N, WW01> ATAPI
5/cdrom r
emovable
cd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 2
pciide0: channel 1 disabled (no drives)
ahci0 at pci0 dev 31 function 2 "Intel 82801GR AHCI" rev 0x01: msi, AHCI 1.1
scsibus1 at ahci0: 32 targets
sd0 at scsibus1 targ 0 lun 0: <ATA, ST3160815AS, 3.AA> SCSI3 0/direct
fixed t10
.ATA_ST3160815AS_6RX19ARP
sd0: 152627MB, 512 bytes/sector, 312581808 sectors
sd1 at scsibus1 targ 1 lun 0: <ATA, ST3160815AS, 3.AA> SCSI3 0/direct
fixed t10
.ATA_ST3160815AS_6RX1CX8C
sd1: 152627MB, 512 bytes/sector, 312581808 sectors
ichiic0 at pci0 dev 31 function 3 "Intel 82801GB SMBus" rev 0x01: apic 2
int 19
iic0 at ichiic0
spdmem0 at iic0 addr 0x50: 1GB DDR2 SDRAM ECC PC2-5300CL5
spdmem1 at iic0 addr 0x52: 1GB DDR2 SDRAM ECC PC2-5300CL5
usb1 at uhci0: USB revision 1.0
uhub1 at usb1 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb2 at uhci1: USB revision 1.0
uhub2 at usb2 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb3 at uhci2: USB revision 1.0
uhub3 at usb3 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb4 at uhci3: USB revision 1.0
uhub4 at usb4 "Intel UHCI root hub" rev 1.00/1.00 addr 1
isa0 at ichpcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: console
pckbc0 at isa0 port 0x60/5
pckbd0 at pckbc0 (kbd slot)
pckbc0: using irq 1 for kbd slot
wskbd0 at pckbd0: console keyboard, using wsdisplay0
pms0 at pckbc0 (aux slot)
pckbc0: using irq 12 for aux slot
wsmouse0 at pms0 mux 0
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
npx0 at isa0 port 0xf0/16: reported by CPUID; using exception 16
mtrr: Pentium Pro MTRR support
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
softraid0: trying to bring up sd2 degraded
sd2 at scsibus3 targ 1 lun 0: <OPENBSD, SR RAID 1, 005> SCSI2 0/direct fixed
sd2: 152625MB, 512 bytes/sector, 312576113 sectors
softraid0: roaming device -> sd1a
root on sd2a (aded2a972fcdcdc6.a) swap on sd2b dump on sd2b
ddb> boot reboot
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
splassert: assertwaitok: want -1 have 1
panic: kernel diagnostic assertion "p->p_wchan == NULL" failed: file
"../../../../kern/kern_sched.c", line 311
Stopped at Debugger+0x4: popl %ebp
RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!
DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!
ddb>
ddb> trace
Debugger(d08fdf3c,f53ece68,d08dc788,f53ece68,7fffffff) at Debugger+0x4
panic(d08dc788,d085fdae,d08da496,d08da478,137) at panic+0x5d
__assert(d085fdae,d08da478,137,d08da496,7fffffff) at __assert+0x2e
sched_chooseproc(d8e66c84,d8e67d14,f53ececc,d059aebc,d2e8d5d8) at
sched_choosep
roc+0x15a
mi_switch(b0,d08daaf9,f53ecf0c,f53ecefc,f53ecf20) at mi_switch+0xe5
sleep_finish(f53ecf20,1,20,d08daca7,d2ec4a00) at sleep_finish+0x98
msleep(d2ec7780,d2ec7790,20,d08daca7,0) at msleep+0x7b
workq_next_task(d2ec7780,d2ef4cfc,d02008bf,d2ec7780,d03f2d80) at
workq_next_tas
k+0x44
workq_thread(d2ec7780) at workq_thread+0x18
Bad frame pointer: 0xd0bc99b8
ddb> show panic
kernel diagnostic assertion "p->p_wchan == NULL" failed: file
"../../../../kern
/kern_sched.c", line 311