panic: kernel diagnostic assertion "skrev->reverse == NULL"

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
6 messages Options
Reply | Threaded
Open this post in threaded view
|

panic: kernel diagnostic assertion "skrev->reverse == NULL"

Johan Huldtgren-3
hello,

trying to connect to my gateway today I found the following
panic. This is 100% reproducible anytime I connect via
openvpn and then generate traffic. This first happened on
the Feb 7th snap, I updated and it happens on the latest
snap as well.

For completeness sake, here is first all the info on the panic
on the Feb 7th snap, after the dmesg is all the same info on
the Feb 21st snap.

thanks,

.jh

panic: kernel diagnostic assertion "skrev->reverse == NULL" failed: file
"/usr/src/sys/net/pf.c", line 7270
Stopped at      db_enter+0x4:   popl    %ebp
     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*382011   8487    577        0x10          0    0  openvpn
db_enter() at db_enter+0x4
panic() at panic+0xcc
__assert(d09bedab,d0b648ec,1c66,d0b650d4) at __assert+0x19
pf_find_state(d24b9a00,f5274e10,2,d2574200) at pf_find_state+0x28d
pf_test_state(f5274e90,f5274e80,f5274e8e,0) at pf_test_state+0x104
pf_test(2,3,d24b8000,f5274f94) at pf_test+0xb63
ip_output(d2574200,0,f5275000,1,0,0,0) at ip_output+0x649
ip_forward(d2574200,d24b8000,d1f8aed8,0) at ip_forward+0x20a
ip_input_if(f5275194,f5275180,4,0,d24b8000) at ip_input_if+0x48e
ipv4_input(d24b8000,d2574200) at ipv4_input+0x2b
tun_dev_write(d24b8000,f52752c8,d1f99df0) at tun_dev_write+0x222
tunwrite(2800,f52752c8,11) at tunwrite+0x53
spec_write(f5275248) at spec_write+0x78
VOP_WRITE(d1f7de24,f52752c8,11,d202df00) at VOP_WRITE+0x42
https://www.openbsd.org/ddb.html describes the minimum info required in
bug
reports.  Insufficient info makes it difficult to find and fix bugs.

ddb> trace
db_enter() at db_enter+0x4
panic() at panic+0xcc
__assert(d09bedab,d0b648ec,1c66,d0b650d4) at __assert+0x19
pf_find_state(d24b9a00,f5274e10,2,d2574200) at pf_find_state+0x28d
pf_test_state(f5274e90,f5274e80,f5274e8e,0) at pf_test_state+0x104
pf_test(2,3,d24b8000,f5274f94) at pf_test+0xb63
ip_output(d2574200,0,f5275000,1,0,0,0) at ip_output+0x649
ip_forward(d2574200,d24b8000,d1f8aed8,0) at ip_forward+0x20a
ip_input_if(f5275194,f5275180,4,0,d24b8000) at ip_input_if+0x48e
ipv4_input(d24b8000,d2574200) at ipv4_input+0x2b
tun_dev_write(d24b8000,f52752c8,d1f99df0) at tun_dev_write+0x222
tunwrite(2800,f52752c8,11) at tunwrite+0x53
spec_write(f5275248) at spec_write+0x78
VOP_WRITE(d1f7de24,f52752c8,11,d202df00) at VOP_WRITE+0x42
vn_write(d200c0a0,d200c0b8,f52752c8,d202df00) at vn_write+0xc7
dofilewritev(d1f99df0,5,d200c0a0,cf7e8ca8,2,1,d200c0b8,f52753a0) at
dofilewrite
v+0x1b1
sys_writev(d1f99df0,f52753a8,f52753a0) at sys_writev+0x3d
syscall() at syscall+0x1a0
--- syscall (number -813790040) ---
end of kernel
start_phys+0x12e:

ddb> ps
    PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
  76928   64690      1     77  3    0x100090  poll          dhcpd
  23952   86556      1     53  3        0x90  kqread        unbound
   1968  515165  46389     97  3    0x100090  kqread        nsd
  46389  389573  23179     97  3    0x100090  poll          nsd
  23179  418722      1     97  3    0x100090  kqread        nsd
  81052  413963      1      0  3    0x100083  ttyin         getty
  63997  254650      1      0  3    0x100098  poll          cron
  62652  411448      1    688  3        0x90  kqread        
dnscrypt-proxy
  98530  289998  65915    623  3        0x90  nanosleep     zabbix_agentd
  28926  441573  65915    623  3        0x90  select        zabbix_agentd
  98291  298225  65915    623  3        0x90  netcon        zabbix_agentd
  84500  125667  65915    623  3        0x90  select        zabbix_agentd
  50361  228391  65915    623  3        0x90  nanosleep     zabbix_agentd
  65915  306901      1    623  3        0x90  wait          zabbix_agentd
  75273  505271      1    535  3        0x90  nanosleep     symon
  58494   77567      1      0  3        0x80  nanosleep     apcupsd
  58494  304290      1      0  3   0x4000088  sigwait       apcupsd
  58494    4517      1      0  3   0x4000080  netcon        apcupsd
  46137  427048      1    629  3        0x90  poll          avahi-daemon
  95643  255974      1    572  3        0x90  poll          dbus-daemon
  47067  140831      1    559  3        0x90  select        radiusd
  47067    9217      1    559  3   0x4000090  thrsleep      radiusd
  47067  233419      1    559  3   0x4000090  thrsleep      radiusd
  47067  216119      1    559  3   0x4000090  thrsleep      radiusd
  47067  436940      1    559  3   0x4000090  thrsleep      radiusd
  47067  330094      1    559  3   0x4000090  thrsleep      radiusd
  30490  406527      1     99  3    0x100090  poll          sndiod
  55647  501483      1    110  3    0x100090  poll          sndiod
  93612  318909      1     79  3    0x100090  kqread        tftpd
  58417  513464      1    109  3        0x90  kqread        ftp-proxy
  80195  311963  25066     95  3    0x100092  kqread        smtpd
  71856   32281  25066    103  3    0x100092  kqread        smtpd
  47599   73377  25066     95  3    0x100092  kqread        smtpd
  31309  355943  25066     95  3    0x100092  kqread        smtpd
  36099   15939  25066     95  3    0x100092  kqread        smtpd
   6158  125021  25066     95  3    0x100092  kqread        smtpd
  25066  391619      1      0  3    0x100080  kqread        smtpd
  62309  381885      1      0  3        0x80  select        sshd
  15015  347653      0      0  3     0x14200  acct          acct
  86153  377796  32544      0  3        0x80  netio         npppd
  32544  141225      1     82  3        0x90  kqread        npppd
  54974   47511   6756     68  3        0x90  select        isakmpd
   6756  306484      1      0  3        0x80  netio         isakmpd
  97342  213478      1      0  3    0x100080  poll          ntpd
  98965  130798      1     83  3    0x100092  poll          ntpd
  80824  400353  56647     74  3    0x100092  bpf           pflogd
  56647  438370      1      0  3        0x80  netio         pflogd
  90294  489541  21297     73  3    0x100090  kqread        syslogd
  21297  299442      1      0  3    0x100082  netio         syslogd
* 8487  382011      1    577  7        0x10                openvpn
  75343   57459  67614    115  3    0x100092  kqread        slaacd
  45698  163880  67614    115  3    0x100092  kqread        slaacd
  67614   35179      1      0  3        0x80  kqread        slaacd
  35433  486650      0      0  3     0x14200  pgzero        zerothread
  43494   91286      0      0  3     0x14200  aiodoned      aiodoned
  35517  241111      0      0  3     0x14200  syncer        update
  67937  360352      0      0  3     0x14200  cleaner       cleaner
  21567  447776      0      0  3     0x14200  reaper        reaper
  46256  141336      0      0  3     0x14200  pgdaemon      pagedaemon
  11515  237860      0      0  3     0x14200  bored         crynlk
  81477  351625      0      0  3     0x14200  bored         crypto
  59591  462324      0      0  3     0x14200  usbtsk        usbtask
   5609  355128      0      0  3     0x14200  usbatsk       usbatsk
  56311    8803      0      0  2     0x14200                softnet
  11278  311141      0      0  3     0x14200  bored         systqmp
  57512  318817      0      0  3     0x14200  bored         systq
  32409  236932      0      0  3  0x40014200  bored         softclock
   5458  422749      0      0  3  0x40014200                idle0
  66589  215285      0      0  3     0x14200  kmalloc       kmthread
      1  325751      0      0  3        0x82  wait          init
      0       0     -1      0  3     0x10200  scheduler     swapper

ddb> show registers
ds                  0x10
es                  0x10
fs                  0x20
gs                     0
edi           0xd0ae7be4        twe_pci_devices+0x358
esi                0x100
ebp           0xf5274d64
ebx           0xf5274d8c
edx                  0x5
ecx                    0
eax                  0x1
eip           0xd05032f4        db_enter+0x4
cs                   0x8
eflags          0x200202        start_phys+0x12e
esp           0xf5274d64
ss                  0x10
db_enter+0x4:   popl    %ebp

ddb> show mbuf
mbuf 0xd05032f4
m_type: 22103   m_flags:
458b<M_EXT,M_PKTHDR,M_EXTWR,M_ACAST,M_BCAST,M_CONF,M_C
OMP>
m_next: 0xccccc35d      m_nextpkt: 0xcccccccc
m_data: 0xcccccccc      m_len: 1407551829
m_dat: 0xd050330c       m_pktdat: 0xd0503348
m_ptkhdr.ph_ifidx: 4152489865   m_pkthdr.len: -1995940469
m_ptkhdr.ph_tags: 0x8408b0c     m_pkthdr.ph_tagsset:
47b<IPSEC_IN_DONE,IPSEC_OU
T_DONE,IPSEC_OUT_CRYPTO_NEEDED,IPSEC_PENDING_TDB,BRIDGE,GIF>
m_pkthdr.ph_flowid: 53129       m_pkthdr.ph_loopcnt: 97
m_pkthdr.csum_flags:
f781<IPV4_CSUM_OUT,UDP_CSUM_IN_OK,UDP_CSUM_IN_BAD,ICMP_CSU
M_OUT,ICMP_CSUM_IN_OK>
m_pkthdr.ether_vtag: 25973      m_ptkhdr.ph_rtableid: 998862963
m_pkthdr.pf.statekey: 0x890c7b89        m_pkthdr.pf.inp 0x6df781c7
m_pkthdr.pf.qid: 2305713263     m_pkthdr.pf.tag: 2171
m_pkthdr.pf.flags: 81<GENERATED,PROCESSED>
m_pkthdr.pf.routed: 242 m_pkthdr.pf.prio: 101
m_ext.ext_buf: 0x81145389       m_ext.ext_size: 1684207233
m_ext.ext_free_fn: 273385838    m_ext.ext_arg: 0x657261f1
m_ext.ext_nextref: 0x73897465   m_ext.ext_prevref: 0x6573351c

ddb> trace /p 0t382011
end(f52752c8,11,d202df00,1,11) at 0xd1f7de24
Bad frame pointer: 0xd06a2a87

dmesg

OpenBSD 6.2-current (GENERIC) #397: Wed Feb  7 18:59:22 MST 2018
     [hidden email]:/usr/src/sys/arch/i386/compile/GENERIC
cpu0: Geode(TM) Integrated Processor by AMD PCS ("AuthenticAMD"
586-class) 500 MHz
cpu0: FPU,DE,PSE,TSC,MSR,CX8,SEP,PGE,CMOV,CFLUSH,MMX,MMXX,3DNOW2,3DNOW
real mem  = 536363008 (511MB)
avail mem = 512684032 (488MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: date 20/71/05, BIOS32 rev. 0 @ 0xfac40
pcibios0 at bios0: rev 2.0 @ 0xf0000/0x10000
pcibios0: pcibios_get_intr_routing - function not supported
pcibios0: PCI IRQ Routing information unavailable.
pcibios0: PCI bus #0 is the last bus
bios0: ROM list: 0xc8000/0xa800
cpu0 at mainbus0: (uniprocessor)
mtrr: K6-family MTRR support (2 registers)
amdmsr0 at mainbus0
pci0 at mainbus0 bus 0: configuration mode 1 (no bios)
0:20:0: io address conflict 0x6100/0x100
0:20:0: io address conflict 0x6200/0x200
pchb0 at pci0 dev 1 function 0 "AMD Geode LX" rev 0x31
glxsb0 at pci0 dev 1 function 2 "AMD Geode LX Crypto" rev 0x00: RNG AES
vr0 at pci0 dev 6 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 11,
address 00:00:24:c9:58:4c
ukphy0 at vr0 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr1 at pci0 dev 7 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 5,
address 00:00:24:c9:58:4d
ukphy1 at vr1 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr2 at pci0 dev 8 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 9,
address 00:00:24:c9:58:4e
ukphy2 at vr2 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr3 at pci0 dev 9 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 12,
address 00:00:24:c9:58:4f
ukphy3 at vr3 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
ral0 at pci0 dev 17 function 0 "Ralink RT2561S" rev 0x00: irq 15,
address 00:12:0e:61:7f:b0
ral0: MAC/BBP RT2561C, RF RT5225
glxpcib0 at pci0 dev 20 function 0 "AMD CS5536 ISA" rev 0x03: rev 3,
32-bit 3579545Hz timer, watchdog, gpio, i2c
gpio0 at glxpcib0: 32 pins
iic0 at glxpcib0
pciide0 at pci0 dev 20 function 2 "AMD CS5536 IDE" rev 0x01: DMA,
channel 0 wired to compatibility, channel 1 wired to compatibility
wd0 at pciide0 channel 0 drive 0: <SanDisk SDCFX4-8192>
wd0: 4-sector PIO, LBA, 7815MB, 16007040 sectors
wd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 2
pciide0: channel 1 ignored (disabled)
ohci0 at pci0 dev 21 function 0 "AMD CS5536 USB" rev 0x02: irq 7,
version 1.0, legacy support
ehci0 at pci0 dev 21 function 1 "AMD CS5536 USB" rev 0x02: irq 7
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 configuration 1 interface 0 "AMD EHCI root hub" rev
2.00/1.00 addr 1
isa0 at glxpcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: console
com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbc0: unable to establish interrupt for irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
nsclpcsio0 at isa0 port 0x2e/2: NSC PC87366 rev 10: GPIO VLM TMS
gpio1 at nsclpcsio0: 29 pins
npx0 at isa0 port 0xf0/16: reported by CPUID; using exception 16
usb1 at ohci0: USB revision 1.0
uhub1 at usb1 configuration 1 interface 0 "AMD OHCI root hub" rev
1.00/1.00 addr 1
vscsi0 at root
scsibus1 at vscsi0: 256 targets
softraid0 at root
scsibus2 at softraid0: 256 targets
root on wd0a (ba730608caf94ae4.a) swap on wd0b dump on wd0b

Feb 21st panic:

panic: kernel diagnostic assertion "skrev->reverse == NULL" failed: file
"/usr/src/sys/net/pf.c", line 7277
Stopped at      db_enter+0x4:   popl    %ebp
     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*469986  67790    577        0x10          0    0  openvpn
db_enter() at db_enter+0x4
panic() at panic+0xcc
__assert(d09bf4e8,d09cf7d0,1c6d,d09cffbd) at __assert+0x19
pf_find_state(d2486f00,f528d8e0,2,d251b900) at pf_find_state+0x28d
pf_test_state(f528d960,f528d950,f528d95e,0) at pf_test_state+0x104
pf_test(2,3,d247d400,f528da64) at pf_test+0xb63
ip_output(d251b900,0,f528dad0,1,0,0,0) at ip_output+0x649
ip_forward(d251b900,d247d400,d201cb48,0) at ip_forward+0x20a
ip_input_if(f528dc64,f528dc50,4,0,d247d400) at ip_input_if+0x48e
ipv4_input(d247d400,d251b900) at ipv4_input+0x2b
tun_dev_write(d247d400,f528dd98,10001) at tun_dev_write+0x222
tunwrite(2800,f528dd98,11) at tunwrite+0x53
spec_write(f528dd18) at spec_write+0x78
VOP_WRITE(d1f76ad0,f528dd98,11,d2032ea0) at VOP_WRITE+0x48
https://www.openbsd.org/ddb.html describes the minimum info required in
bug
reports.  Insufficient info makes it difficult to find and fix bugs.

ddb> trace
db_enter() at db_enter+0x4
panic() at panic+0xcc
__assert(d09bf4e8,d09cf7d0,1c6d,d09cffbd) at __assert+0x19
pf_find_state(d2486f00,f528d8e0,2,d251b900) at pf_find_state+0x28d
pf_test_state(f528d960,f528d950,f528d95e,0) at pf_test_state+0x104
pf_test(2,3,d247d400,f528da64) at pf_test+0xb63
ip_output(d251b900,0,f528dad0,1,0,0,0) at ip_output+0x649
ip_forward(d251b900,d247d400,d201cb48,0) at ip_forward+0x20a
ip_input_if(f528dc64,f528dc50,4,0,d247d400) at ip_input_if+0x48e
ipv4_input(d247d400,d251b900) at ipv4_input+0x2b
tun_dev_write(d247d400,f528dd98,10001) at tun_dev_write+0x222
tunwrite(2800,f528dd98,11) at tunwrite+0x53
spec_write(f528dd18) at spec_write+0x78
VOP_WRITE(d1f76ad0,f528dd98,11,d2032ea0) at VOP_WRITE+0x48
vn_write(d200bdc0,d200bdd8,f528dd98,d2032ea0) at vn_write+0xc7
dofilewritev(d1f9816c,5,d200bdc0,cf7d1768,2,1,d200bdd8,f528de70) at
dofilewrite
v+0x1b1
sys_writev(d1f9816c,f528de78,f528de70) at sys_writev+0x5a
syscall() at syscall+0x1a0
--- syscall (number -813885592) ---
end of kernel
start_phys+0x12e:

ddb> ps
    PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
  92831  185204      1    559  3        0x90  select        radiusd
  92831   23458      1    559  3   0x4000090  thrsleep      radiusd
  92831   63835      1    559  3   0x4000090  thrsleep      radiusd
  92831  301981      1    559  3   0x4000090  thrsleep      radiusd
  92831  144620      1    559  3   0x4000090  thrsleep      radiusd
  92831   95356      1    559  3   0x4000090  thrsleep      radiusd
  27295  168304      1     77  3    0x100090  poll          dhcpd
  45933  396142      1     53  3        0x90  kqread        unbound
  86521  276907      1      0  3    0x100083  ttyin         ksh
  78428   51758      1      0  3    0x100098  poll          cron
  33663  425597      1    688  3        0x90  kqread        
dnscrypt-proxy
  96828  254184  45108    623  3        0x90  nanosleep     zabbix_agentd
  40869  297862  45108    623  3        0x90  select        zabbix_agentd
  79509  273818  45108    623  3        0x90  select        zabbix_agentd
  82823  467131  45108    623  3        0x90  netcon        zabbix_agentd
  84204  372276  45108    623  3        0x90  nanosleep     zabbix_agentd
  45108  515831      1    623  3        0x90  wait          zabbix_agentd
  41956   33317      1    535  3        0x90  nanosleep     symon
   5311  195329      1      0  3        0x80  nanosleep     apcupsd
   5311  246109      1      0  3   0x4000088  sigwait       apcupsd
   5311  514305      1      0  3   0x4000080  netcon        apcupsd
  97569  237508      1    629  3        0x90  poll          avahi-daemon
  85737   83396      1    572  3        0x90  poll          dbus-daemon
  37046  204512      1     99  3    0x100090  poll          sndiod
  43033  120305      1    110  3    0x100090  poll          sndiod
  76726  491665      1     79  3    0x100090  kqread        tftpd
  54415  422026      1    109  3        0x90  kqread        ftp-proxy
  67237  280723  74173     95  3    0x100092  kqread        smtpd
  64168  502951  74173    103  3    0x100092  kqread        smtpd
  43584  208678  74173     95  3    0x100092  kqread        smtpd
  29331  358342  74173     95  3    0x100092  kqread        smtpd
  27892  326227  74173     95  3    0x100092  kqread        smtpd
  50048  471312  74173     95  3    0x100092  kqread        smtpd
  74173  280421      1      0  3    0x100080  kqread        smtpd
  58592  330686      1      0  3        0x80  select        sshd
  24987  319166      0      0  3     0x14200  acct          acct
  68726  435836  97080      0  3        0x80  netio         npppd
  97080  444033      1     82  3        0x90  kqread        npppd
  38981  130894  75869     68  3        0x90  select        isakmpd
  75869  299535      1      0  3        0x80  netio         isakmpd
  16664  374693      1      0  3    0x100080  poll          ntpd
    983  473943  54143     83  3    0x100092  poll          ntpd
  54143   28602      1     83  3    0x100092  poll          ntpd
  49654  431364  21669     97  3    0x100090  kqread        nsd
  21669  430208  87884     97  3    0x100090  poll          nsd
  87884  232099      1     97  3    0x100090  kqread        nsd
  82601  373153  88489     74  3    0x100092  bpf           pflogd
  88489  516126      1      0  3        0x80  netio         pflogd
  40723  395790  80611     73  3    0x100090  kqread        syslogd
  80611  145211      1      0  3    0x100082  netio         syslogd
*67790  469986      1    577  7        0x10                openvpn
  13292  472427  56656    115  3    0x100092  kqread        slaacd
  94723  226910  56656    115  3    0x100092  kqread        slaacd
  56656   69013      1      0  3        0x80  kqread        slaacd
  59597  418711      0      0  3     0x14200  pgzero        zerothread
  32188  123614      0      0  3     0x14200  aiodoned      aiodoned
  30296  340515      0      0  3     0x14200  syncer        update
  87132  425856      0      0  3     0x14200  cleaner       cleaner
  49677  159572      0      0  3     0x14200  reaper        reaper
  20752  498677      0      0  3     0x14200  pgdaemon      pagedaemon
   5834  370089      0      0  3     0x14200  bored         crynlk
  11178  143871      0      0  3     0x14200  bored         crypto
   5616  340057      0      0  3     0x14200  usbtsk        usbtask
  65664   15603      0      0  3     0x14200  usbatsk       usbatsk
  38587   15694      0      0  2     0x14200                softnet
  31736  170093      0      0  3     0x14200  bored         systqmp
  94270  499526      0      0  3     0x14200  bored         systq
  45709  333779      0      0  2  0x40014200                softclock
  17418  142055      0      0  3  0x40014200                idle0
  21082   32581      0      0  3     0x14200  kmalloc       kmthread
      1  118942      0      0  3        0x82  wait          init
      0       0     -1      0  3     0x10200  scheduler     swapper

ddb> show registers
ds                  0x10
es                  0x10
fs                  0x20
gs                     0
edi           0xd0b60f00        wi_card_ident+0xaf4
esi                0x100
ebp           0xf528d834
ebx           0xf528d85c
edx                  0x5
ecx                    0
eax                  0x1
eip           0xd0429194        db_enter+0x4
cs                   0x8
eflags          0x200202        start_phys+0x12e
esp           0xf528d834
ss                  0x10
db_enter+0x4:   popl    %ebp

ddb> show mbuf
mbuf 0xd0429194
m_type: 2125    m_flags:
c985<M_EXT,M_EOR,M_ACAST,M_BCAST,M_AUTH,M_COMP,M_LINK0
>
m_next: 0xccccc35d      m_nextpkt: 0xcccccccc
m_data: 0xcccccccc      m_len: 2347075925
m_dat: 0xd04291ac       m_pktdat: 0xd04291e8
m_ext.ext_buf: 0x10458b0c       m_ext.ext_size: 3339351296
m_ext.ext_free_fn: 3502170248   m_ext.ext_arg: 0x3d83008b
m_ext.ext_nextref: 0xbe8bc405   m_ext.ext_prevref: 0xd0

ddb> trace /p 0t469986
uvm_fault(0xd20245f0, 0x0, 0, 1) -> e
kernel: page fault trap, code=0
Faulted in DDB; continuing...

dmesg:

OpenBSD 6.2-current (GENERIC) #403: Wed Feb 21 21:38:13 MST 2018
     [hidden email]:/usr/src/sys/arch/i386/compile/GENERIC
cpu0: Geode(TM) Integrated Processor by AMD PCS ("AuthenticAMD"
586-class) 500 MHz
cpu0: FPU,DE,PSE,TSC,MSR,CX8,SEP,PGE,CMOV,CFLUSH,MMX,MMXX,3DNOW2,3DNOW
real mem  = 536363008 (511MB)
avail mem = 512651264 (488MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: date 20/71/05, BIOS32 rev. 0 @ 0xfac40
pcibios0 at bios0: rev 2.0 @ 0xf0000/0x10000
pcibios0: pcibios_get_intr_routing - function not supported
pcibios0: PCI IRQ Routing information unavailable.
pcibios0: PCI bus #0 is the last bus
bios0: ROM list: 0xc8000/0xa800
cpu0 at mainbus0: (uniprocessor)
mtrr: K6-family MTRR support (2 registers)
amdmsr0 at mainbus0
pci0 at mainbus0 bus 0: configuration mode 1 (no bios)
0:20:0: io address conflict 0x6100/0x100
0:20:0: io address conflict 0x6200/0x200
pchb0 at pci0 dev 1 function 0 "AMD Geode LX" rev 0x31
glxsb0 at pci0 dev 1 function 2 "AMD Geode LX Crypto" rev 0x00: RNG AES
vr0 at pci0 dev 6 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 11,
address 00:00:24:c9:58:4c
ukphy0 at vr0 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr1 at pci0 dev 7 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 5,
address 00:00:24:c9:58:4d
ukphy1 at vr1 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr2 at pci0 dev 8 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 9,
address 00:00:24:c9:58:4e
ukphy2 at vr2 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr3 at pci0 dev 9 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 12,
address 00:00:24:c9:58:4f
ukphy3 at vr3 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
ral0 at pci0 dev 17 function 0 "Ralink RT2561S" rev 0x00: irq 15,
address 00:12:0e:61:7f:b0
ral0: MAC/BBP RT2561C, RF RT5225
glxpcib0 at pci0 dev 20 function 0 "AMD CS5536 ISA" rev 0x03: rev 3,
32-bit 3579545Hz timer, watchdog, gpio, i2c
gpio0 at glxpcib0: 32 pins
iic0 at glxpcib0
pciide0 at pci0 dev 20 function 2 "AMD CS5536 IDE" rev 0x01: DMA,
channel 0 wired to compatibility, channel 1 wired to compatibility
wd0 at pciide0 channel 0 drive 0: <SanDisk SDCFX4-8192>
wd0: 4-sector PIO, LBA, 7815MB, 16007040 sectors
wd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 2
pciide0: channel 1 ignored (disabled)
ohci0 at pci0 dev 21 function 0 "AMD CS5536 USB" rev 0x02: irq 7,
version 1.0, legacy support
ehci0 at pci0 dev 21 function 1 "AMD CS5536 USB" rev 0x02: irq 7
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 configuration 1 interface 0 "AMD EHCI root hub" rev
2.00/1.00 addr 1
isa0 at glxpcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: console
com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbc0: unable to establish interrupt for irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
nsclpcsio0 at isa0 port 0x2e/2: NSC PC87366 rev 10: GPIO VLM TMS
gpio1 at nsclpcsio0: 29 pins
npx0 at isa0 port 0xf0/16: reported by CPUID; using exception 16
usb1 at ohci0: USB revision 1.0
uhub1 at usb1 configuration 1 interface 0 "AMD OHCI root hub" rev
1.00/1.00 addr 1
vscsi0 at root
scsibus1 at vscsi0: 256 targets
softraid0 at root
scsibus2 at softraid0: 256 targets
root on wd0a (ba730608caf94ae4.a) swap on wd0b dump on wd0b
WARNING: / was not properly unmounted
carp1: state transition: BACKUP -> MASTER
carp2: state transition: BACKUP -> MASTER
carp3: state transition: BACKUP -> MASTER
carp0: state transition: BACKUP -> MASTER

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "skrev->reverse == NULL"

Alexander Bluhm
On Sat, Feb 24, 2018 at 04:20:50PM -0500, Johan Huldtgren wrote:
> trying to connect to my gateway today I found the following
> panic. This is 100% reproducible anytime I connect via
> openvpn and then generate traffic. This first happened on
> the Feb 7th snap, I updated and it happens on the latest
> snap as well.

The questions is what have you used before.  I have rewritten the
code at 2017/12/29.  Was your working version before that?

Although a bit different, a simmilar assertion was there before.

> ip_output(d2574200,0,f5275000,1,0,0,0) at ip_output+0x649
> ip_forward(d2574200,d24b8000,d1f8aed8,0) at ip_forward+0x20a

> ddb> show mbuf
> mbuf 0xd05032f4

When you type "show mbuf" you must give the address of the mbuf.
The functions ip_output() and ip_forward() pass it at the first
argument.  So "show mbuf 0xd2574200" would produces reasonable
results, your command dumped arbitrary memory.

> panic: kernel diagnostic assertion "skrev->reverse == NULL" failed: file
> "/usr/src/sys/net/pf.c", line 7277
> pf_find_state(d2486f00,f528d8e0,2,d251b900) at pf_find_state+0x28d

This functions calls pf_state_key_link_reverse(sk, pkt_sk) with
pkt_sk->reverse != NULL.

On the way to that call we went through
                pkt_sk = m->m_pkthdr.pf.statekey;
                if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse))
                        sk = pkt_sk->reverse;

We know that pkt_sk != NULL and pkt_sk->reverse != NULL and before
doing the RB_FIND() lookup we check that sk == NULL.  So
pf_state_key_isvalid(pkt_sk->reverse) must be false.

The kernel tried to use an invalid statekey.  How can that happen?
Invalid means sk->removed == 1, but pf_state_key_detach() also calls
pf_state_key_unlink_reverse().

> pf_test(2,3,d247d400,f528da64) at pf_test+0xb63
> ip_output(d251b900,0,f528dad0,1,0,0,0) at ip_output+0x649

Here we find the outgoing state.  The mbuf had a statekey before.

> ip_forward(d251b900,d247d400,d201cb48,0) at ip_forward+0x20a
> ip_input_if(f528dc64,f528dc50,4,0,d247d400) at ip_input_if+0x48e
> ipv4_input(d247d400,d251b900) at ipv4_input+0x2b

Here pf attaches the incoming statekey to the mbuf.  This is the
one with the invalid reverse.

> tun_dev_write(d247d400,f528dd98,10001) at tun_dev_write+0x222
> tunwrite(2800,f528dd98,11) at tunwrite+0x53

How does you pf config look like?  Do you have some skip on tun?
Was there unencrpyted traffic before you enabled openvpn?  Were
there matching pf states before you enabled openvpn?  Does it
immediately crash when you start openvpn and the first packet is
sent out.  Do you only use the tun interface and the outgoing
interface?  Do you have more forwarding paths?  Do you use floating
states?  Does the problem go away with if-bound states?  Is
there more stuff involved like gif(4) or bridge(4) or ....

Although I do not fully understand what is the root of the problem,
you can try this diff.  Does it prevent the panic?  Do you see the
log message I have added there?  This would at least prove that my
theory is correct.

bluhm

Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1061
diff -u -p -r1.1061 pf.c
--- net/pf.c 18 Feb 2018 21:45:30 -0000 1.1061
+++ net/pf.c 26 Feb 2018 00:27:57 -0000
@@ -1070,8 +1070,20 @@ pf_find_state(struct pfi_kif *kif, struc
  pkt_sk = NULL;
  }
 
- if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse))
- sk = pkt_sk->reverse;
+ if (pkt_sk) {
+ if (pf_state_key_isvalid(pkt_sk->reverse)) {
+ sk = pkt_sk->reverse;
+ } else if (pkt_sk->reverse != NULL) {
+ log(LOG_ERR,
+    "pf: state key reverse invalid. "
+    "pkt_sk=%p, pkt_sk->reverse=%p, "
+    "pkt_sk->reverse->reverse=%p\n",
+    pkt_sk, pkt_sk->reverse,
+    pkt_sk->reverse->reverse);
+ pf_mbuf_unlink_state_key(m);
+ pkt_sk = NULL;
+ }
+ }
 
  if (pkt_sk == NULL) {
  /* here we deal with local outbound packet */

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "skrev->reverse == NULL"

Johan Huldtgren-3
On 2018-02-25 21:54, Alexander Bluhm wrote:
> On Sat, Feb 24, 2018 at 04:20:50PM -0500, Johan Huldtgren wrote:
>> trying to connect to my gateway today I found the following
>> panic. This is 100% reproducible anytime I connect via
>> openvpn and then generate traffic. This first happened on
>> the Feb 7th snap, I updated and it happens on the latest
>> snap as well.
>
> The questions is what have you used before.  I have rewritten the
> code at 2017/12/29.  Was your working version before that?

Looking back at my logs it looks like the last time I used it was
January 20th, and the snap I had then was:

OpenBSD 6.2-current (GENERIC) #316: Sat Dec 23 11:39:17 MST 2017
     [hidden email]:/usr/src/sys/arch/i386/compile/GENERIC

> Although a bit different, a simmilar assertion was there before.
>
>> ip_output(d2574200,0,f5275000,1,0,0,0) at ip_output+0x649
>> ip_forward(d2574200,d24b8000,d1f8aed8,0) at ip_forward+0x20a
>
>> ddb> show mbuf
>> mbuf 0xd05032f4
>
> When you type "show mbuf" you must give the address of the mbuf.
> The functions ip_output() and ip_forward() pass it at the first
> argument.  So "show mbuf 0xd2574200" would produces reasonable
> results, your command dumped arbitrary memory.

thanks, noted for the future.

>> panic: kernel diagnostic assertion "skrev->reverse == NULL" failed:
>> file
>> "/usr/src/sys/net/pf.c", line 7277
>> pf_find_state(d2486f00,f528d8e0,2,d251b900) at pf_find_state+0x28d
>
> This functions calls pf_state_key_link_reverse(sk, pkt_sk) with
> pkt_sk->reverse != NULL.
>
> On the way to that call we went through
> pkt_sk = m->m_pkthdr.pf.statekey;
>                 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse))
>                         sk = pkt_sk->reverse;
>
> We know that pkt_sk != NULL and pkt_sk->reverse != NULL and before
> doing the RB_FIND() lookup we check that sk == NULL.  So
> pf_state_key_isvalid(pkt_sk->reverse) must be false.
>
> The kernel tried to use an invalid statekey.  How can that happen?
> Invalid means sk->removed == 1, but pf_state_key_detach() also calls
> pf_state_key_unlink_reverse().
>
>> pf_test(2,3,d247d400,f528da64) at pf_test+0xb63
>> ip_output(d251b900,0,f528dad0,1,0,0,0) at ip_output+0x649
>
> Here we find the outgoing state.  The mbuf had a statekey before.
>
>> ip_forward(d251b900,d247d400,d201cb48,0) at ip_forward+0x20a
>> ip_input_if(f528dc64,f528dc50,4,0,d247d400) at ip_input_if+0x48e
>> ipv4_input(d247d400,d251b900) at ipv4_input+0x2b
>
> Here pf attaches the incoming statekey to the mbuf.  This is the
> one with the invalid reverse.
>
>> tun_dev_write(d247d400,f528dd98,10001) at tun_dev_write+0x222
>> tunwrite(2800,f528dd98,11) at tunwrite+0x53
>
> How does you pf config look like?  Do you have some skip on tun?

The only thing I have skip on is lo

> Was there unencrpyted traffic before you enabled openvpn?

I assume so, this is my firewall there is always some traffic
but tun0 is only used for openvpn

> Were there matching pf states before you enabled openvpn?

I'm not sure exactly sure what you're asking, as this host
does more than openvpn there would have been other states,
not sure if I can find that now though?

> Does it immediately crash when you start openvpn and the first
> packet is sent out.

It seems so I can connect and if I then (for example) ping a
host on the inside, I get one (most I've ever seen is three)
ping back and then nothing. At that point I can also see that
the host has failed over to it's carp partner.

> Do you only use the tun interface and the outgoing
> interface?

No, here is the output of ifconfig, there are several
interfaces / vlans,

# ifconfig
lo0: flags=8049<UP,LOOPBACK,RUNNING,MULTICAST> mtu 32768
         index 6 priority 0 llprio 3
         groups: lo
         inet6 ::1 prefixlen 128
         inet6 fe80::1%lo0 prefixlen 64 scopeid 0x6
         inet 127.0.0.1 netmask 0xff000000
vr0: flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
mtu 1500
         lladdr 00:00:24:c9:58:4c
         index 1 priority 0 llprio 3
         groups: egress
         media: Ethernet autoselect (100baseTX full-duplex)
         status: active
         inet 172.16.0.2 netmask 0xffffff00 broadcast 172.16.0.255
vr1: flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
mtu 1500
         lladdr 00:00:24:c9:58:4d
         index 2 priority 0 llprio 3
         media: Ethernet autoselect (100baseTX full-duplex)
         status: active
vr2: flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
mtu 1500
         lladdr 00:00:24:c9:58:4e
         index 3 priority 0 llprio 3
         media: Ethernet autoselect (100baseTX full-duplex)
         status: active
vr3: flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
mtu 1500
         lladdr 00:00:24:c9:58:4f
         index 4 priority 0 llprio 3
         media: Ethernet autoselect (100baseTX full-duplex)
         status: active
enc0: flags=0<>
         index 5 priority 0 llprio 3
         groups: enc
         status: active
ral0: flags=8802<BROADCAST,SIMPLEX,MULTICAST> mtu 1500
         lladdr 00:12:0e:61:7f:b0
         index 7 priority 4 llprio 3
         groups: wlan
         media: IEEE802.11 autoselect
         status: no network
         ieee80211: nwid ""
carp0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
         lladdr 00:00:5e:00:01:01
         index 8 priority 15 llprio 3
         carp: MASTER carpdev vr0 vhid 1 advbase 1 advskew 0 carppeer
172.16.0.3
         groups: carp
         status: master
         inet 172.16.0.103 netmask 0xffffff00 broadcast 172.16.0.255
carp1: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
         lladdr 00:00:5e:00:01:02
         index 9 priority 15 llprio 3
         carp: MASTER carpdev vlan20 vhid 2 advbase 1 advskew 0 carppeer
192.168.100.3
         groups: carp
         status: master
         inet 192.168.100.103 netmask 0xffffff00 broadcast
192.168.100.255
carp2: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
         lladdr 00:00:5e:00:01:03
         index 10 priority 15 llprio 3
         carp: MASTER carpdev vlan30 vhid 3 advbase 1 advskew 0 carppeer
192.168.0.3
         groups: carp
         status: master
         inet 192.168.0.103 netmask 0xffffff00 broadcast 192.168.0.255
carp3: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
         lladdr 00:00:5e:00:01:04
         index 11 priority 15 llprio 3
         carp: MASTER carpdev vlan666 vhid 4 advbase 1 advskew 0 carppeer
10.66.66.3
         groups: carp
         status: master
         inet 10.66.66.103 netmask 0xffffff00 broadcast 10.66.66.255
pflow0: flags=41<UP,RUNNING> mtu 1448
         index 12 priority 0 llprio 3
         pflow: sender: 192.168.100.2 receiver: 192.168.100.8:9995
version: 10
         groups: pflow
pfsync0: flags=41<UP,RUNNING> mtu 1500
         index 13 priority 0 llprio 3
         pfsync: syncdev: vlan666 syncpeer: 10.66.66.3 maxupd: 128 defer:
off
         groups: carp pfsync
tun0: flags=8051<UP,POINTOPOINT,RUNNING,MULTICAST> mtu 1500
         index 14 priority 0 llprio 3
         groups: tun
         status: active
         inet 10.6.6.1 --> 10.6.6.2 netmask 0xffffffff
vlan20: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu
1500
         lladdr 00:00:24:c9:58:4d
         index 15 priority 0 llprio 3
         encap: vnetid 20 parent vr1
         groups: vlan
         media: Ethernet autoselect (100baseTX full-duplex)
         status: active
         inet 192.168.100.2 netmask 0xffffff00 broadcast 192.168.100.255
vlan30: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu
1500
         lladdr 00:00:24:c9:58:4e
         index 16 priority 0 llprio 3
         encap: vnetid 30 parent vr2
         groups: vlan
         media: Ethernet autoselect (100baseTX full-duplex)
         status: active
         inet 192.168.0.2 netmask 0xffffff00 broadcast 192.168.0.255
vlan666: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu
1500
         lladdr 00:00:24:c9:58:4f
         index 17 priority 0 llprio 3
         encap: vnetid 666 parent vr3
         groups: vlan
         media: Ethernet autoselect (100baseTX full-duplex)
         status: active
         inet 10.66.66.2 netmask 0xffffff00 broadcast 10.66.66.255
pflog0: flags=141<UP,RUNNING,PROMISC> mtu 33172
         index 18 priority 0 llprio 3
         groups: pflog

> Do you have more forwarding paths?

I don't know, how would I check?

> Do you use floating states?

pf.conf(5) says this is the default and I've not
changed it.

> Does the problem go away with if-bound states?

No, I set this in pf.conf

set state-policy if-bound

and tried and it still paniced.

> Is there more stuff involved like gif(4) or bridge(4) or ....

neither gif nor bridge, but vlan and carp.

> Although I do not fully understand what is the root of the problem,
> you can try this diff.  Does it prevent the panic?  Do you see the
> log message I have added there?  This would at least prove that my
> theory is correct.

I traveling (which is why I was trying to use my vpn), unfortunately
my internet here is atrocious, just reading and writing email is
very hard. Once I get somewhere with better connections I'll get this
tested and report back.

thanks,

.jh

> bluhm
>
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> retrieving revision 1.1061
> diff -u -p -r1.1061 pf.c
> --- net/pf.c 18 Feb 2018 21:45:30 -0000 1.1061
> +++ net/pf.c 26 Feb 2018 00:27:57 -0000
> @@ -1070,8 +1070,20 @@ pf_find_state(struct pfi_kif *kif, struc
>   pkt_sk = NULL;
>   }
>
> - if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse))
> - sk = pkt_sk->reverse;
> + if (pkt_sk) {
> + if (pf_state_key_isvalid(pkt_sk->reverse)) {
> + sk = pkt_sk->reverse;
> + } else if (pkt_sk->reverse != NULL) {
> + log(LOG_ERR,
> +    "pf: state key reverse invalid. "
> +    "pkt_sk=%p, pkt_sk->reverse=%p, "
> +    "pkt_sk->reverse->reverse=%p\n",
> +    pkt_sk, pkt_sk->reverse,
> +    pkt_sk->reverse->reverse);
> + pf_mbuf_unlink_state_key(m);
> + pkt_sk = NULL;
> + }
> + }
>
>   if (pkt_sk == NULL) {
>   /* here we deal with local outbound packet */

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "skrev->reverse == NULL"

Johan Huldtgren-3
On 2018-02-25 22:48, Johan Huldtgren wrote:

> On 2018-02-25 21:54, Alexander Bluhm wrote:
>> On Sat, Feb 24, 2018 at 04:20:50PM -0500, Johan Huldtgren wrote:
>>> trying to connect to my gateway today I found the following
>>> panic. This is 100% reproducible anytime I connect via
>>> openvpn and then generate traffic. This first happened on
>>> the Feb 7th snap, I updated and it happens on the latest
>>> snap as well.
>>
>> The questions is what have you used before.  I have rewritten the
>> code at 2017/12/29.  Was your working version before that?
>
> Looking back at my logs it looks like the last time I used it was
> January 20th, and the snap I had then was:
>
> OpenBSD 6.2-current (GENERIC) #316: Sat Dec 23 11:39:17 MST 2017
>     [hidden email]:/usr/src/sys/arch/i386/compile/GENERIC
>
>> Although a bit different, a simmilar assertion was there before.
>>
>>> ip_output(d2574200,0,f5275000,1,0,0,0) at ip_output+0x649
>>> ip_forward(d2574200,d24b8000,d1f8aed8,0) at ip_forward+0x20a
>>
>>> ddb> show mbuf
>>> mbuf 0xd05032f4
>>
>> When you type "show mbuf" you must give the address of the mbuf.
>> The functions ip_output() and ip_forward() pass it at the first
>> argument.  So "show mbuf 0xd2574200" would produces reasonable
>> results, your command dumped arbitrary memory.
>
> thanks, noted for the future.
>
>>> panic: kernel diagnostic assertion "skrev->reverse == NULL" failed:
>>> file
>>> "/usr/src/sys/net/pf.c", line 7277
>>> pf_find_state(d2486f00,f528d8e0,2,d251b900) at pf_find_state+0x28d
>>
>> This functions calls pf_state_key_link_reverse(sk, pkt_sk) with
>> pkt_sk->reverse != NULL.
>>
>> On the way to that call we went through
>> pkt_sk = m->m_pkthdr.pf.statekey;
>>                 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse))
>>                         sk = pkt_sk->reverse;
>>
>> We know that pkt_sk != NULL and pkt_sk->reverse != NULL and before
>> doing the RB_FIND() lookup we check that sk == NULL.  So
>> pf_state_key_isvalid(pkt_sk->reverse) must be false.
>>
>> The kernel tried to use an invalid statekey.  How can that happen?
>> Invalid means sk->removed == 1, but pf_state_key_detach() also calls
>> pf_state_key_unlink_reverse().
>>
>>> pf_test(2,3,d247d400,f528da64) at pf_test+0xb63
>>> ip_output(d251b900,0,f528dad0,1,0,0,0) at ip_output+0x649
>>
>> Here we find the outgoing state.  The mbuf had a statekey before.
>>
>>> ip_forward(d251b900,d247d400,d201cb48,0) at ip_forward+0x20a
>>> ip_input_if(f528dc64,f528dc50,4,0,d247d400) at ip_input_if+0x48e
>>> ipv4_input(d247d400,d251b900) at ipv4_input+0x2b
>>
>> Here pf attaches the incoming statekey to the mbuf.  This is the
>> one with the invalid reverse.
>>
>>> tun_dev_write(d247d400,f528dd98,10001) at tun_dev_write+0x222
>>> tunwrite(2800,f528dd98,11) at tunwrite+0x53
>>
>> How does you pf config look like?  Do you have some skip on tun?
>
> The only thing I have skip on is lo
>
>> Was there unencrpyted traffic before you enabled openvpn?
>
> I assume so, this is my firewall there is always some traffic
> but tun0 is only used for openvpn
>
>> Were there matching pf states before you enabled openvpn?
>
> I'm not sure exactly sure what you're asking, as this host
> does more than openvpn there would have been other states,
> not sure if I can find that now though?
>
>> Does it immediately crash when you start openvpn and the first
>> packet is sent out.
>
> It seems so I can connect and if I then (for example) ping a
> host on the inside, I get one (most I've ever seen is three)
> ping back and then nothing. At that point I can also see that
> the host has failed over to it's carp partner.
>
>> Do you only use the tun interface and the outgoing
>> interface?
>
> No, here is the output of ifconfig, there are several
> interfaces / vlans,
>
> # ifconfig
> lo0: flags=8049<UP,LOOPBACK,RUNNING,MULTICAST> mtu 32768
>         index 6 priority 0 llprio 3
>         groups: lo
>         inet6 ::1 prefixlen 128
>         inet6 fe80::1%lo0 prefixlen 64 scopeid 0x6
>         inet 127.0.0.1 netmask 0xff000000
> vr0:
> flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
> mtu 1500
>         lladdr 00:00:24:c9:58:4c
>         index 1 priority 0 llprio 3
>         groups: egress
>         media: Ethernet autoselect (100baseTX full-duplex)
>         status: active
>         inet 172.16.0.2 netmask 0xffffff00 broadcast 172.16.0.255
> vr1:
> flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
> mtu 1500
>         lladdr 00:00:24:c9:58:4d
>         index 2 priority 0 llprio 3
>         media: Ethernet autoselect (100baseTX full-duplex)
>         status: active
> vr2:
> flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
> mtu 1500
>         lladdr 00:00:24:c9:58:4e
>         index 3 priority 0 llprio 3
>         media: Ethernet autoselect (100baseTX full-duplex)
>         status: active
> vr3:
> flags=8b43<UP,BROADCAST,RUNNING,PROMISC,ALLMULTI,SIMPLEX,MULTICAST>
> mtu 1500
>         lladdr 00:00:24:c9:58:4f
>         index 4 priority 0 llprio 3
>         media: Ethernet autoselect (100baseTX full-duplex)
>         status: active
> enc0: flags=0<>
>         index 5 priority 0 llprio 3
>         groups: enc
>         status: active
> ral0: flags=8802<BROADCAST,SIMPLEX,MULTICAST> mtu 1500
>         lladdr 00:12:0e:61:7f:b0
>         index 7 priority 4 llprio 3
>         groups: wlan
>         media: IEEE802.11 autoselect
>         status: no network
>         ieee80211: nwid ""
> carp0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
>         lladdr 00:00:5e:00:01:01
>         index 8 priority 15 llprio 3
>         carp: MASTER carpdev vr0 vhid 1 advbase 1 advskew 0 carppeer
> 172.16.0.3
>         groups: carp
>         status: master
>         inet 172.16.0.103 netmask 0xffffff00 broadcast 172.16.0.255
> carp1: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
>         lladdr 00:00:5e:00:01:02
>         index 9 priority 15 llprio 3
>         carp: MASTER carpdev vlan20 vhid 2 advbase 1 advskew 0
> carppeer 192.168.100.3
>         groups: carp
>         status: master
>         inet 192.168.100.103 netmask 0xffffff00 broadcast
> 192.168.100.255
> carp2: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
>         lladdr 00:00:5e:00:01:03
>         index 10 priority 15 llprio 3
>         carp: MASTER carpdev vlan30 vhid 3 advbase 1 advskew 0
> carppeer 192.168.0.3
>         groups: carp
>         status: master
>         inet 192.168.0.103 netmask 0xffffff00 broadcast 192.168.0.255
> carp3: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> mtu 1500
>         lladdr 00:00:5e:00:01:04
>         index 11 priority 15 llprio 3
>         carp: MASTER carpdev vlan666 vhid 4 advbase 1 advskew 0
> carppeer 10.66.66.3
>         groups: carp
>         status: master
>         inet 10.66.66.103 netmask 0xffffff00 broadcast 10.66.66.255
> pflow0: flags=41<UP,RUNNING> mtu 1448
>         index 12 priority 0 llprio 3
>         pflow: sender: 192.168.100.2 receiver: 192.168.100.8:9995
> version: 10
>         groups: pflow
> pfsync0: flags=41<UP,RUNNING> mtu 1500
>         index 13 priority 0 llprio 3
>         pfsync: syncdev: vlan666 syncpeer: 10.66.66.3 maxupd: 128
> defer: off
>         groups: carp pfsync
> tun0: flags=8051<UP,POINTOPOINT,RUNNING,MULTICAST> mtu 1500
>         index 14 priority 0 llprio 3
>         groups: tun
>         status: active
>         inet 10.6.6.1 --> 10.6.6.2 netmask 0xffffffff
> vlan20: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu
> 1500
>         lladdr 00:00:24:c9:58:4d
>         index 15 priority 0 llprio 3
>         encap: vnetid 20 parent vr1
>         groups: vlan
>         media: Ethernet autoselect (100baseTX full-duplex)
>         status: active
>         inet 192.168.100.2 netmask 0xffffff00 broadcast 192.168.100.255
> vlan30: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu
> 1500
>         lladdr 00:00:24:c9:58:4e
>         index 16 priority 0 llprio 3
>         encap: vnetid 30 parent vr2
>         groups: vlan
>         media: Ethernet autoselect (100baseTX full-duplex)
>         status: active
>         inet 192.168.0.2 netmask 0xffffff00 broadcast 192.168.0.255
> vlan666: flags=8943<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> mtu
> 1500
>         lladdr 00:00:24:c9:58:4f
>         index 17 priority 0 llprio 3
>         encap: vnetid 666 parent vr3
>         groups: vlan
>         media: Ethernet autoselect (100baseTX full-duplex)
>         status: active
>         inet 10.66.66.2 netmask 0xffffff00 broadcast 10.66.66.255
> pflog0: flags=141<UP,RUNNING,PROMISC> mtu 33172
>         index 18 priority 0 llprio 3
>         groups: pflog
>
>> Do you have more forwarding paths?
>
> I don't know, how would I check?
>
>> Do you use floating states?
>
> pf.conf(5) says this is the default and I've not
> changed it.
>
>> Does the problem go away with if-bound states?
>
> No, I set this in pf.conf
>
> set state-policy if-bound
>
> and tried and it still paniced.
>
>> Is there more stuff involved like gif(4) or bridge(4) or ....
>
> neither gif nor bridge, but vlan and carp.
>
>> Although I do not fully understand what is the root of the problem,
>> you can try this diff.  Does it prevent the panic?  Do you see the
>> log message I have added there?  This would at least prove that my
>> theory is correct.
>
> I traveling (which is why I was trying to use my vpn), unfortunately
> my internet here is atrocious, just reading and writing email is
> very hard. Once I get somewhere with better connections I'll get this
> tested and report back.
>
> thanks,
>
> .jh
>
>> bluhm
>>
>> Index: net/pf.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
>> retrieving revision 1.1061
>> diff -u -p -r1.1061 pf.c
>> --- net/pf.c 18 Feb 2018 21:45:30 -0000 1.1061
>> +++ net/pf.c 26 Feb 2018 00:27:57 -0000
>> @@ -1070,8 +1070,20 @@ pf_find_state(struct pfi_kif *kif, struc
>>   pkt_sk = NULL;
>>   }
>>
>> - if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse))
>> - sk = pkt_sk->reverse;
>> + if (pkt_sk) {
>> + if (pf_state_key_isvalid(pkt_sk->reverse)) {
>> + sk = pkt_sk->reverse;
>> + } else if (pkt_sk->reverse != NULL) {
>> + log(LOG_ERR,
>> +    "pf: state key reverse invalid. "
>> +    "pkt_sk=%p, pkt_sk->reverse=%p, "
>> +    "pkt_sk->reverse->reverse=%p\n",
>> +    pkt_sk, pkt_sk->reverse,
>> +    pkt_sk->reverse->reverse);
>> + pf_mbuf_unlink_state_key(m);
>> + pkt_sk = NULL;
>> + }
>> + }
>>
>>   if (pkt_sk == NULL) {
>>   /* here we deal with local outbound packet */

Patch does not change anything, it panics as soon as I've connected and
I try to ping something on the inside. I couldn't find any logging for
this, I assume it would be in messages or daemon?

panic: kernel diagnostic assertion "skrev->reverse == NULL" failed: file
"/usr/src/sys/net/pf.c", line 7293
Stopped at      db_enter+0x4:   popl    %ebp
     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*426685  67308    577        0x10          0    0  openvpn
db_enter() at db_enter+0x4
panic() at panic+0xcc
__assert(d09c0ca8,d0a924b0,1c7d,d0a92ce7) at __assert+0x19
pf_find_state(d2486f00,f528cb20,2,d24bb700) at pf_find_state+0x2dd
pf_test_state(f528cba0,f528cb90,f528cb9e,0) at pf_test_state+0x104
pf_test(2,3,d247f400,f528cca4) at pf_test+0xb63
ip_output(d24bb700,0,f528cd10,1,0,0,0) at ip_output+0x649
ip_forward(d24bb700,d247f400,d201eab0,0) at ip_forward+0x20a
ip_input_if(f528cea4,f528ce90,4,0,d247f400) at ip_input_if+0x48e
ipv4_input(d247f400,d24bb700) at ipv4_input+0x2b
tun_dev_write(d247f400,f528cfd8,10001) at tun_dev_write+0x222
tunwrite(2800,f528cfd8,11) at tunwrite+0x53
spec_write(f528cf58) at spec_write+0x78
VOP_WRITE(d1f78ad0,f528cfd8,11,d2034f00) at VOP_WRITE+0x48
https://www.openbsd.org/ddb.html describes the minimum info required in
bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb>
ddb> trace
db_enter() at db_enter+0x4
panic() at panic+0xcc
__assert(d09c0ca8,d0a924b0,1c7d,d0a92ce7) at __assert+0x19
pf_find_state(d2486f00,f528cb20,2,d24bb700) at pf_find_state+0x2dd
pf_test_state(f528cba0,f528cb90,f528cb9e,0) at pf_test_state+0x104
pf_test(2,3,d247f400,f528cca4) at pf_test+0xb63
ip_output(d24bb700,0,f528cd10,1,0,0,0) at ip_output+0x649
ip_forward(d24bb700,d247f400,d201eab0,0) at ip_forward+0x20a
ip_input_if(f528cea4,f528ce90,4,0,d247f400) at ip_input_if+0x48e
ipv4_input(d247f400,d24bb700) at ipv4_input+0x2b
tun_dev_write(d247f400,f528cfd8,10001) at tun_dev_write+0x222
tunwrite(2800,f528cfd8,11) at tunwrite+0x53
spec_write(f528cf58) at spec_write+0x78
VOP_WRITE(d1f78ad0,f528cfd8,11,d2034f00) at VOP_WRITE+0x48
vn_write(d200dbe0,d200dbf8,f528cfd8,d2034f00) at vn_write+0xc7
dofilewritev(d1f97008,5,d200dbe0,cf7cac98,2,1,d200dbf8,f528d0b0) at
dofilewrite
v+0x1b1
sys_writev(d1f97008,f528d0b8,f528d0b0) at sys_writev+0x5a
syscall() at syscall+0x1a0
--- syscall (number -813912936) ---
end of kernel
start_phys+0x12e:
ddb>
ddb> show mbuf 0xd24bb700
mbuf 0xd24bb700
m_type: 1       m_flags: 2<M_PKTHDR>
m_next: 0x0     m_nextpkt: 0x0
m_data: 0xd24bb758      m_len: 78
m_dat: 0xd24bb718       m_pktdat: 0xd24bb754
m_ptkhdr.ph_ifidx: 14   m_pkthdr.len: 78
m_ptkhdr.ph_tags: 0x0   m_pkthdr.ph_tagsset: 0
m_pkthdr.ph_flowid: 34655       m_pkthdr.ph_loopcnt: 0
m_pkthdr.csum_flags: 0
m_pkthdr.ether_vtag: 0  m_ptkhdr.ph_rtableid: 0
m_pkthdr.pf.statekey: 0xf52db9b0        m_pkthdr.pf.inp 0x0
m_pkthdr.pf.qid: 0      m_pkthdr.pf.tag: 0
m_pkthdr.pf.flags: 80<PROCESSED>
m_pkthdr.pf.routed: 0   m_pkthdr.pf.prio: 3
ddb>
ddb> ps
    PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
  37494  334977      1    688  3        0x90  kqread        
dnscrypt-proxy
  78569  262057      1    559  3        0x90  select        radiusd
  78569  223907      1    559  3   0x4000090  thrsleep      radiusd
  78569   86461      1    559  3   0x4000090  thrsleep      radiusd
  78569  415163      1    559  3   0x4000090  thrsleep      radiusd
  78569   59253      1    559  3   0x4000090  thrsleep      radiusd
  78569  504707      1    559  3   0x4000090  thrsleep      radiusd
  83384  510810      1     77  3    0x100090  poll          dhcpd
   8001  427162      1     53  3        0x90  kqread        unbound
  83662  263207      1      0  3    0x100083  ttyin         ksh
  29432  315860      1      0  3    0x100098  poll          cron
  97054   64971  31708    623  3        0x90  nanosleep     zabbix_agentd
  14721  110975  31708    623  3        0x90  select        zabbix_agentd
  82957  470661  31708    623  3        0x90  netcon        zabbix_agentd
  61580   96345  31708    623  3        0x90  select        zabbix_agentd
   8157  293731  31708    623  3        0x90  nanosleep     zabbix_agentd
  31708  274566      1    623  3        0x90  wait          zabbix_agentd
  65018  293563      1    535  3        0x90  nanosleep     symon
  75972   80506      1      0  3        0x80  nanosleep     apcupsd
  75972  217074      1      0  3   0x4000088  sigwait       apcupsd
  75972   62068      1      0  3   0x4000080  netcon        apcupsd
  57051  119033      1    629  3        0x90  poll          avahi-daemon
  90815   22042      1    572  3        0x90  poll          dbus-daemon
  25072  502298      1     99  3    0x100090  poll          sndiod
   6111  141612      1    110  3    0x100090  poll          sndiod
  81476  457888      1     79  3    0x100090  kqread        tftpd
  13352  482910      1    109  3        0x90  kqread        ftp-proxy
  17797  258167  17617     95  3    0x100092  kqread        smtpd
  35691   98616  17617    103  3    0x100092  kqread        smtpd
  50891  204907  17617     95  3    0x100092  kqread        smtpd
  22451  235621  17617     95  3    0x100092  kqread        smtpd
  67790  425909  17617     95  3    0x100092  kqread        smtpd
  88648  171631  17617     95  3    0x100092  kqread        smtpd
  17617  127335      1      0  3    0x100080  kqread        smtpd
  48610  186699      1      0  3        0x80  select        sshd
  60394  333487      0      0  3     0x14200  acct          acct
  92161  414025  98730      0  3        0x80  netio         npppd
  98730  513329      1     82  3        0x90  kqread        npppd
  10322  369136  84716     68  3        0x90  select        isakmpd
  84716   67144      1      0  3        0x80  netio         isakmpd
  30734  509637      1      0  3    0x100080  poll          ntpd
   4940  247546  97778     83  3    0x100092  poll          ntpd
  97778  181176      1     83  3    0x100092  poll          ntpd
  31384  156265  25407     97  3    0x100090  kqread        nsd
  25407  467498  65501     97  3    0x100090  poll          nsd
  65501   80344      1     97  3    0x100090  kqread        nsd
  69998  287306  15273     74  3    0x100092  bpf           pflogd
  15273  489651      1      0  3        0x80  netio         pflogd
  95834  160543  78157     73  3    0x100090  kqread        syslogd
  78157   19265      1      0  3    0x100082  netio         syslogd
*67308  426685      1    577  7        0x10                openvpn
  49289  191175  64972    115  3    0x100092  kqread        slaacd
   6865  210520  64972    115  3    0x100092  kqread        slaacd
  64972  472377      1      0  3        0x80  kqread        slaacd
  37453  123053      0      0  3     0x14200  pgzero        zerothread
  32407  300486      0      0  3     0x14200  aiodoned      aiodoned
  77016  486977      0      0  3     0x14200  syncer        update
  48084  405859      0      0  3     0x14200  cleaner       cleaner
  27113  323454      0      0  3     0x14200  reaper        reaper
  76905  448558      0      0  3     0x14200  pgdaemon      pagedaemon
  14677  339690      0      0  3     0x14200  bored         crynlk
  20961  216802      0      0  3     0x14200  bored         crypto
  97734   10367      0      0  3     0x14200  usbtsk        usbtask
  43400  187151      0      0  3     0x14200  usbatsk       usbatsk
  95404  265322      0      0  2     0x14200                softnet
   3895  109165      0      0  3     0x14200  bored         systqmp
  59037   50324      0      0  3     0x14200  bored         systq
  78726  466647      0      0  3  0x40014200  bored         softclock
  45874  438047      0      0  3  0x40014200                idle0
  85719  238669      0      0  3     0x14200  kmalloc       kmthread
      1   85595      0      0  3        0x82  wait          init
      0       0     -1      0  3     0x10200  scheduler     swapper
ddb>
ddb> show registers
ds                  0x10
es                  0x10
fs                  0x20
gs                     0
edi           0xd0a88f24        via8231_routing_cnfg+0x5dc
esi                0x100
ebp           0xf528ca74
ebx           0xf528ca9c
edx                  0x5
ecx                    0
eax                  0x1
eip           0xd06e0984        db_enter+0x4
cs                   0x8
eflags          0x200202        start_phys+0x12e
esp           0xf528ca74
ss                  0x10
db_enter+0x4:   popl    %ebp
ddb>
ddb> trace /p 0t426685
uvm_fault(0xd20264c0, 0x0, 0, 1) -> e
kernel: page fault trap, code=0
Faulted in DDB; continuing...
ddb>

OpenBSD 6.2-current (GENERIC) #0: Tue Feb 27 01:39:17 EST 2018
     [hidden email]:/sys/arch/i386/compile/GENERIC
cpu0: Geode(TM) Integrated Processor by AMD PCS ("AuthenticAMD"
586-class) 500 MHz
cpu0: FPU,DE,PSE,TSC,MSR,CX8,SEP,PGE,CMOV,CFLUSH,MMX,MMXX,3DNOW2,3DNOW
real mem  = 536363008 (511MB)
avail mem = 512655360 (488MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: date 20/71/05, BIOS32 rev. 0 @ 0xfac40
pcibios0 at bios0: rev 2.0 @ 0xf0000/0x10000
pcibios0: pcibios_get_intr_routing - function not supported
pcibios0: PCI IRQ Routing information unavailable.
pcibios0: PCI bus #0 is the last bus
bios0: ROM list: 0xc8000/0xa800
cpu0 at mainbus0: (uniprocessor)
mtrr: K6-family MTRR support (2 registers)
amdmsr0 at mainbus0
pci0 at mainbus0 bus 0: configuration mode 1 (no bios)
0:20:0: io address conflict 0x6100/0x100
0:20:0: io address conflict 0x6200/0x200
pchb0 at pci0 dev 1 function 0 "AMD Geode LX" rev 0x31
glxsb0 at pci0 dev 1 function 2 "AMD Geode LX Crypto" rev 0x00: RNG AES
vr0 at pci0 dev 6 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 11,
address 00:00:24:c9:58:4c
ukphy0 at vr0 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr1 at pci0 dev 7 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 5,
address 00:00:24:c9:58:4d
ukphy1 at vr1 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr2 at pci0 dev 8 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 9,
address 00:00:24:c9:58:4e
ukphy2 at vr2 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
vr3 at pci0 dev 9 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 12,
address 00:00:24:c9:58:4f
ukphy3 at vr3 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
0x004063, model 0x0034
ral0 at pci0 dev 17 function 0 "Ralink RT2561S" rev 0x00: irq 15,
address 00:12:0e:61:7f:b0
ral0: MAC/BBP RT2561C, RF RT5225
glxpcib0 at pci0 dev 20 function 0 "AMD CS5536 ISA" rev 0x03: rev 3,
32-bit 3579545Hz timer, watchdog, gpio, i2c
gpio0 at glxpcib0: 32 pins
iic0 at glxpcib0
pciide0 at pci0 dev 20 function 2 "AMD CS5536 IDE" rev 0x01: DMA,
channel 0 wired to compatibility, channel 1 wired to compatibility
wd0 at pciide0 channel 0 drive 0: <SanDisk SDCFX4-8192>
wd0: 4-sector PIO, LBA, 7815MB, 16007040 sectors
wd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 2
pciide0: channel 1 ignored (disabled)
ohci0 at pci0 dev 21 function 0 "AMD CS5536 USB" rev 0x02: irq 7,
version 1.0, legacy support
ehci0 at pci0 dev 21 function 1 "AMD CS5536 USB" rev 0x02: irq 7
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 configuration 1 interface 0 "AMD EHCI root hub" rev
2.00/1.00 addr 1
isa0 at glxpcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: console
com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbc0: unable to establish interrupt for irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
nsclpcsio0 at isa0 port 0x2e/2: NSC PC87366 rev 10: GPIO VLM TMS
gpio1 at nsclpcsio0: 29 pins
npx0 at isa0 port 0xf0/16: reported by CPUID; using exception 16
usb1 at ohci0: USB revision 1.0
uhub1 at usb1 configuration 1 interface 0 "AMD OHCI root hub" rev
1.00/1.00 addr 1
vscsi0 at root
scsibus1 at vscsi0: 256 targets
softraid0 at root
scsibus2 at softraid0: 256 targets
root on wd0a (ba730608caf94ae4.a) swap on wd0b dump on wd0b
WARNING: / was not properly unmounted
carp0: state transition: BACKUP -> MASTER
carp1: state transition: BACKUP -> MASTER
carp2: state transition: BACKUP -> MASTER
carp3: state transition: BACKUP -> MASTER
carp0: state transition: MASTER -> BACKUP
carp1: state transition: MASTER -> BACKUP
carp2: state transition: MASTER -> BACKUP
carp3: state transition: MASTER -> BACKUP
carp0: state transition: BACKUP -> MASTER
carp1: state transition: BACKUP -> MASTER
carp2: state transition: BACKUP -> MASTER
carp3: state transition: BACKUP -> MASTER

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "skrev->reverse == NULL"

Alexander Bluhm
On Tue, Feb 27, 2018 at 11:15:22AM -0300, Johan Huldtgren wrote:
> Patch does not change anything, it panics as soon as I've connected and
> I try to ping something on the inside. I couldn't find any logging for
> this, I assume it would be in messages or daemon?

When it crashes, there is no log.  syslogd(8) has no chance to write
it, the panic is faster.

I can reproduce it and know how it happens.  When linking the
incoming and outgoing state, the forward and reverse statekey may
be equal.  This happens when the source and destination address is
the same.

nc -u -s 10.188.236.74 -p 12345 10.188.236.74 12345

I guess in your case this is not the ping packet but some other
protocol without port.

I did not consider this possiblity when I commited the cleanup.

----------------------------
revision 1.1053
date: 2017/12/29 17:05:25;  author: bluhm;  state: Exp;  lines: +59 -34;  commitid: 75zQrAJWxZAn90Ue;
Make the functions which link the pf state keys to mbufs, inpcbs,
or other states more consistent.
OK visa@ sashan@ on a previous version
----------------------------

So let's add a comment and move the kassert to the beginning where
it was before.

Johan, does this fix your crash?

ok?

bluhm

Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1062
diff -u -p -r1.1062 pf.c
--- net/pf.c 27 Feb 2018 09:24:56 -0000 1.1062
+++ net/pf.c 1 Mar 2018 19:45:07 -0000
@@ -7273,12 +7273,10 @@ pf_inp_unlink(struct inpcb *inp)
 void
 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev)
 {
- /*
- * Assert will not wire as long as we are called by pf_find_state()
- */
+ /* Note that sk and skrev may be equal, then we refcount twice. */
  KASSERT(sk->reverse == NULL);
- sk->reverse = pf_state_key_ref(skrev);
  KASSERT(skrev->reverse == NULL);
+ sk->reverse = pf_state_key_ref(skrev);
  skrev->reverse = pf_state_key_ref(sk);
 }
 
@@ -7386,6 +7384,7 @@ pf_state_key_unlink_reverse(struct pf_st
 {
  struct pf_state_key *skrev = sk->reverse;
 
+ /* Note that sk and skrev may be equal, then we unref twice. */
  if (skrev != NULL) {
  KASSERT(skrev->reverse == sk);
  sk->reverse = NULL;

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "skrev->reverse == NULL"

Johan Huldtgren-3
On 2018-03-01 17:00, Alexander Bluhm wrote:

> On Tue, Feb 27, 2018 at 11:15:22AM -0300, Johan Huldtgren wrote:
>> Patch does not change anything, it panics as soon as I've connected
>> and
>> I try to ping something on the inside. I couldn't find any logging for
>> this, I assume it would be in messages or daemon?
>
> When it crashes, there is no log.  syslogd(8) has no chance to write
> it, the panic is faster.
>
> I can reproduce it and know how it happens.  When linking the
> incoming and outgoing state, the forward and reverse statekey may
> be equal.  This happens when the source and destination address is
> the same.
>
> nc -u -s 10.188.236.74 -p 12345 10.188.236.74 12345
>
> I guess in your case this is not the ping packet but some other
> protocol without port.
>
> I did not consider this possiblity when I commited the cleanup.
>
> ----------------------------
> revision 1.1053
> date: 2017/12/29 17:05:25;  author: bluhm;  state: Exp;  lines: +59
> -34;  commitid: 75zQrAJWxZAn90Ue;
> Make the functions which link the pf state keys to mbufs, inpcbs,
> or other states more consistent.
> OK visa@ sashan@ on a previous version
> ----------------------------
>
> So let's add a comment and move the kassert to the beginning where
> it was before.
>
> Johan, does this fix your crash?

yes thank you, I just applied this and I can now connect and work
without any issues (lightly tested for about 20 minutes, but before
it would panic instantaneously, so I'm fairly confident this is
good).

thank you very much.

.jh


> ok?
>
> bluhm
>
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> retrieving revision 1.1062
> diff -u -p -r1.1062 pf.c
> --- net/pf.c 27 Feb 2018 09:24:56 -0000 1.1062
> +++ net/pf.c 1 Mar 2018 19:45:07 -0000
> @@ -7273,12 +7273,10 @@ pf_inp_unlink(struct inpcb *inp)
>  void
>  pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key
> *skrev)
>  {
> - /*
> - * Assert will not wire as long as we are called by pf_find_state()
> - */
> + /* Note that sk and skrev may be equal, then we refcount twice. */
>   KASSERT(sk->reverse == NULL);
> - sk->reverse = pf_state_key_ref(skrev);
>   KASSERT(skrev->reverse == NULL);
> + sk->reverse = pf_state_key_ref(skrev);
>   skrev->reverse = pf_state_key_ref(sk);
>  }
>
> @@ -7386,6 +7384,7 @@ pf_state_key_unlink_reverse(struct pf_st
>  {
>   struct pf_state_key *skrev = sk->reverse;
>
> + /* Note that sk and skrev may be equal, then we unref twice. */
>   if (skrev != NULL) {
>   KASSERT(skrev->reverse == sk);
>   sk->reverse = NULL;