assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
9 messages Options
Reply | Threaded
Open this post in threaded view
|

assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Stuart Henderson
iked box, GENERIC.MP + WITNESS, -current as of Friday 10th:

login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310
splassert: soassertlocked: want 1 have 256

panic: spl assertion failure in soassertlocked
Starting stack trace...
Faulted in traceback, aborting...
panic(splassert: if_down: want 1 have 256
panic: spl assertion failure in if_down) at
Faulted in traceback, aborting...
panicsplassert: if_down: want 1 have 256
+0x133panic: spl assertion failure in if_down
Faulted in traceback, aborting...

<repeated a few times>

It's stuck at this point, I can't enter ddb.


OpenBSD 6.2-current (WITNESS) #0: Fri Nov 10 07:54:54 GMT 2017
    [hidden email]:/src/cvs-openbsd/sys/arch/amd64/compile/WITNESS
real mem = 1996152832 (1903MB)
avail mem = 1907134464 (1818MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.7 @ 0x77fb7020 (7 entries)
bios0: vendor coreboot version "88a4f96" date 03/11/2016
bios0: PC Engines apu2
acpi0 at bios0: rev 2
acpi0: sleep states S0 S1 S2 S3 S4 S5
acpi0: tables DSDT FACP SSDT APIC HEST SSDT SSDT HPET
acpi0: wakeup devices PWRB(S4) PBR4(S4) PBR5(S4) PBR6(S4) PBR7(S4) PBR8(S4) UOH1(S3) UOH3(S3) UOH5(S3) XHC0(S4)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD GX-412TC SOC, 998.27 MHz
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE
3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,M
ASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,ITSC,BMI1
cpu0: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 16-way L2 cache
cpu0: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu0: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
acpitimer0: recalibrated TSC frequency 998129941 Hz
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 99MHz
cpu0: mwait min=64, max=64, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: AMD GX-412TC SOC, 998.25 MHz
cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE
3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,M
ASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,ITSC,BMI1
cpu1: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 16-way L2 cache
cpu1: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu1: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 2 (application processor)
cpu2: AMD GX-412TC SOC, 998.13 MHz
cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE
3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,M
ASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,ITSC,BMI1
cpu2: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 16-way L2 cache
cpu2: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu2: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: AMD GX-412TC SOC, 998.14 MHz
cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE
3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,M
ASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,ITSC,BMI1
cpu3: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 16-way L2 cache
cpu3: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu3: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu3: smt 0, core 3, package 0
ioapic0 at mainbus0: apid 4 pa 0xfec00000, version 21, 24 pins
ioapic1 at mainbus0: apid 5 pa 0xfec20000, version 21, 32 pins
, remapped to apid 5
acpihpet0 at acpi0: 14318180 Hz
acpihpet0: recalibrated TSC frequency 998133658 Hz
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (PBR4)
acpiprt2 at acpi0: bus 1 (PBR5)
acpiprt3 at acpi0: bus 2 (PBR6)
acpiprt4 at acpi0: bus 3 (PBR7)
acpiprt5 at acpi0: bus -1 (PBR8)
acpicpu0 at acpi0: C2(0@400 io@0x1771), C1(@1 halt!), PSS
acpicpu1 at acpi0: C2(0@400 io@0x1771), C1(@1 halt!), PSS
acpicpu2 at acpi0: C2(0@400 io@0x1771), C1(@1 halt!), PSS
acpicpu3 at acpi0: C2(0@400 io@0x1771), C1(@1 halt!), PSS
acpibtn0 at acpi0: PWRB
cpu0: 998 MHz: speeds: 1000 800 600 MHz
pci0 at mainbus0 bus 0
pchb0 at pci0 dev 0 function 0 "AMD AMD64 16h Root Complex" rev 0x00
pchb1 at pci0 dev 2 function 0 "AMD AMD64 16h Host" rev 0x00
ppb0 at pci0 dev 2 function 2 "AMD AMD64 16h PCIE" rev 0x00: msi
pci1 at ppb0 bus 1
em0 at pci1 dev 0 function 0 "Intel I211" rev 0x03: msi, address 00:0d:b9:41:4c:e0
ppb1 at pci0 dev 2 function 3 "AMD AMD64 16h PCIE" rev 0x00: msi
pci2 at ppb1 bus 2
em1 at pci2 dev 0 function 0 "Intel I211" rev 0x03: msi, address 00:0d:b9:41:4c:e1
ppb2 at pci0 dev 2 function 4 "AMD AMD64 16h PCIE" rev 0x00: msi
pci3 at ppb2 bus 3
em2 at pci3 dev 0 function 0 "Intel I211" rev 0x03: msi, address 00:0d:b9:41:4c:e2
"AMD CCP" rev 0x00 at pci0 dev 8 function 0 not configured
xhci0 at pci0 dev 16 function 0 "AMD Bolton xHCI" rev 0x11: msi
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1
ahci0 at pci0 dev 17 function 0 "AMD Hudson-2 SATA" rev 0x40: apic 4 int 19, AHCI 1.3
ahci0: port 0: 6.0Gb/s
scsibus1 at ahci0: 32 targets
sd0 at scsibus1 targ 0 lun 0: <ATA, SATA SSD, S9FM> SCSI3 0/direct fixed t10.ATA_SATA_SSD_DE35075803EE01965317
sd0: 15272MB, 512 bytes/sector, 31277232 sectors, thin
ehci0 at pci0 dev 19 function 0 "AMD Hudson-2 USB2" rev 0x39: apic 4 int 18
usb1 at ehci0: USB revision 2.0
uhub1 at usb1 configuration 1 interface 0 "AMD EHCI root hub" rev 2.00/1.00 addr 1
piixpm0 at pci0 dev 20 function 0 "AMD Hudson-2 SMBus" rev 0x42: SMBus disabled
pcib0 at pci0 dev 20 function 3 "AMD Hudson-2 LPC" rev 0x11
sdhc0 at pci0 dev 20 function 7 "AMD Bolton SD/MMC" rev 0x01: apic 4 int 16
sdhc0: SDHC 2.0, 63 MHz base clock
sdmmc0 at sdhc0: 4-bit, sd high-speed, mmc high-speed, dma
pchb2 at pci0 dev 24 function 0 "AMD AMD64 16h Link Cfg" rev 0x00
pchb3 at pci0 dev 24 function 1 "AMD AMD64 16h Address Map" rev 0x00
pchb4 at pci0 dev 24 function 2 "AMD AMD64 16h DRAM Cfg" rev 0x00
km0 at pci0 dev 24 function 3 "AMD AMD64 16h Misc Cfg" rev 0x00
pchb5 at pci0 dev 24 function 4 "AMD AMD64 16h CPU Power" rev 0x00
pchb6 at pci0 dev 24 function 5 "AMD AMD64 16h Misc Cfg" rev 0x00
isa0 at pcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: console
com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
lpt0 at isa0 port 0x378/4 irq 7
wbsio0 at isa0 port 0x2e/2: NCT5104D rev 0x52
vmm0 at mainbus0: SVM/RVI
uhub2 at uhub1 port 1 configuration 1 interface 0 "Advanced Micro Devices product 0x7900" rev 2.00/0.18 addr 2
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on sd0a (cf0732d942f7816d.a) swap on sd0b dump on sd0b

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Martin Pieuchot
On 12/11/17(Sun) 21:30, Stuart Henderson wrote:
> iked box, GENERIC.MP + WITNESS, -current as of Friday 10th:

Weird, did you tweak "kern.splassert" on this box?   Otherwise is looks
like a major corruption.

> login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310
^^^
Looks like one CPU is triggering this.

> splassert: soassertlocked: want 1 have 256
>
> panic: spl assertion failure in soassertlocked
^^^
That can't be coming from the same CPU..




> Starting stack trace...
> Faulted in traceback, aborting...
> panic(splassert: if_down: want 1 have 256
> panic: spl assertion failure in if_down) at
> Faulted in traceback, aborting...
> panicsplassert: if_down: want 1 have 256
> +0x133panic: spl assertion failure in if_down
> Faulted in traceback, aborting...
>
> <repeated a few times>
>
> It's stuck at this point, I can't enter ddb.

Are you running with WITNESS on purpose?  Can you reproduce such problem
without it?  I'm not saying it's WITNESS fault, but it's clear that
WITNESS kernels aren't ready for production yet.

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Stuart Henderson
On 2017/11/12 22:48, Martin Pieuchot wrote:
> On 12/11/17(Sun) 21:30, Stuart Henderson wrote:
> > iked box, GENERIC.MP + WITNESS, -current as of Friday 10th:
>
> Weird, did you tweak "kern.splassert" on this box?   Otherwise is looks
> like a major corruption.

It would have kern.splassert=2. (I know this can cause problems
sometimes, though this would be the first time in 5+ years I've bumped
into it, most of my routers where I have serial console have this set).

> > login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310
> ^^^
> Looks like one CPU is triggering this.
>
> > splassert: soassertlocked: want 1 have 256
> >
> > panic: spl assertion failure in soassertlocked
> ^^^
> That can't be coming from the same CPU..
>
>
>
>
> > Starting stack trace...
> > Faulted in traceback, aborting...
> > panic(splassert: if_down: want 1 have 256
> > panic: spl assertion failure in if_down) at
> > Faulted in traceback, aborting...
> > panicsplassert: if_down: want 1 have 256
> > +0x133panic: spl assertion failure in if_down
> > Faulted in traceback, aborting...
> >
> > <repeated a few times>
> >
> > It's stuck at this point, I can't enter ddb.
>
> Are you running with WITNESS on purpose?  Can you reproduce such problem
> without it?  I'm not saying it's WITNESS fault, but it's clear that
> WITNESS kernels aren't ready for production yet.
>

I'm trying to get more information because it had either hanged or
panicked previously (it didn't have serial connected at the time and
the machine was needed so it had to be rebooted before I had chance
to dig into it).

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Martin Pieuchot
On 12/11/17(Sun) 22:10, Stuart Henderson wrote:

> On 2017/11/12 22:48, Martin Pieuchot wrote:
> > On 12/11/17(Sun) 21:30, Stuart Henderson wrote:
> > > iked box, GENERIC.MP + WITNESS, -current as of Friday 10th:
> >
> > Weird, did you tweak "kern.splassert" on this box?   Otherwise is looks
> > like a major corruption.
>
> It would have kern.splassert=2. (I know this can cause problems
> sometimes, though this would be the first time in 5+ years I've bumped
> into it, most of my routers where I have serial console have this set).

Well the panic below correspond to a value of 0 or > 3.

> > > login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310
> > ^^^
> > Looks like one CPU is triggering this.
> >
> > > splassert: soassertlocked: want 1 have 256
> > >
> > > panic: spl assertion failure in soassertlocked
> > ^^^
> > That can't be coming from the same CPU..
> >
> >
> >
> >
> > > Starting stack trace...
> > > Faulted in traceback, aborting...
> > > panic(splassert: if_down: want 1 have 256
> > > panic: spl assertion failure in if_down) at
> > > Faulted in traceback, aborting...
> > > panicsplassert: if_down: want 1 have 256
> > > +0x133panic: spl assertion failure in if_down
> > > Faulted in traceback, aborting...
> > >
> > > <repeated a few times>
> > >
> > > It's stuck at this point, I can't enter ddb.
> >
> > Are you running with WITNESS on purpose?  Can you reproduce such problem
> > without it?  I'm not saying it's WITNESS fault, but it's clear that
> > WITNESS kernels aren't ready for production yet.
> >
>
> I'm trying to get more information because it had either hanged or
> panicked previously (it didn't have serial connected at the time and
> the machine was needed so it had to be rebooted before I had chance
> to dig into it).

From which snapshot was the kernel that hanged or panic'd?

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Stuart Henderson
On 2017/11/13 08:44, Martin Pieuchot wrote:

> On 12/11/17(Sun) 22:10, Stuart Henderson wrote:
> > On 2017/11/12 22:48, Martin Pieuchot wrote:
> > > On 12/11/17(Sun) 21:30, Stuart Henderson wrote:
> > > > iked box, GENERIC.MP + WITNESS, -current as of Friday 10th:
> > >
> > > Weird, did you tweak "kern.splassert" on this box?   Otherwise is looks
> > > like a major corruption.
> >
> > It would have kern.splassert=2. (I know this can cause problems
> > sometimes, though this would be the first time in 5+ years I've bumped
> > into it, most of my routers where I have serial console have this set).
>
> Well the panic below correspond to a value of 0 or > 3.

Confirmed, it was definitely set to 2.

> > > > login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310
> > > ^^^
> > > Looks like one CPU is triggering this.
> > >
> > > > splassert: soassertlocked: want 1 have 256
> > > >
> > > > panic: spl assertion failure in soassertlocked
> > > ^^^
> > > That can't be coming from the same CPU..
> > >
> > >
> > >
> > >
> > > > Starting stack trace...
> > > > Faulted in traceback, aborting...
> > > > panic(splassert: if_down: want 1 have 256
> > > > panic: spl assertion failure in if_down) at
> > > > Faulted in traceback, aborting...
> > > > panicsplassert: if_down: want 1 have 256
> > > > +0x133panic: spl assertion failure in if_down
> > > > Faulted in traceback, aborting...
> > > >
> > > > <repeated a few times>
> > > >
> > > > It's stuck at this point, I can't enter ddb.
> > >
> > > Are you running with WITNESS on purpose?  Can you reproduce such problem
> > > without it?  I'm not saying it's WITNESS fault, but it's clear that
> > > WITNESS kernels aren't ready for production yet.
> > >
> >
> > I'm trying to get more information because it had either hanged or
> > panicked previously (it didn't have serial connected at the time and
> > the machine was needed so it had to be rebooted before I had chance
> > to dig into it).
>
> From which snapshot was the kernel that hanged or panic'd?
>

It was running this:

OpenBSD 6.2-current (GENERIC.MP) #199: Tue Nov  7 18:41:54 MST 2017

I've got it onto a remote control PDU now, now looking for some machine
with an old enough ssh client to be able to connect to the PDU :-|

Which kernel would be most useful to run now?

I have now moved it to -current GENERIC.MP with the "fast path chunk
removed from amd64/amd64/fpu.c fpu_kernel_enter() which we still suspect
as maybe having some issues.

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Martin Pieuchot
On 13/11/17(Mon) 10:03, Stuart Henderson wrote:

> On 2017/11/13 08:44, Martin Pieuchot wrote:
> > On 12/11/17(Sun) 22:10, Stuart Henderson wrote:
> > > On 2017/11/12 22:48, Martin Pieuchot wrote:
> > > > On 12/11/17(Sun) 21:30, Stuart Henderson wrote:
> > > > > iked box, GENERIC.MP + WITNESS, -current as of Friday 10th:
> > > >
> > > > Weird, did you tweak "kern.splassert" on this box?   Otherwise is looks
> > > > like a major corruption.
> > >
> > > It would have kern.splassert=2. (I know this can cause problems
> > > sometimes, though this would be the first time in 5+ years I've bumped
> > > into it, most of my routers where I have serial console have this set).
> >
> > Well the panic below correspond to a value of 0 or > 3.
>
> Confirmed, it was definitely set to 2.

So it seems that two of your CPU end up looking at/dealing with
corrupted memory...

> > > I'm trying to get more information because it had either hanged or
> > > panicked previously (it didn't have serial connected at the time and
> > > the machine was needed so it had to be rebooted before I had chance
> > > to dig into it).
> >
> > From which snapshot was the kernel that hanged or panic'd?
> >
>
> It was running this:
>
> OpenBSD 6.2-current (GENERIC.MP) #199: Tue Nov  7 18:41:54 MST 2017
>
> I've got it onto a remote control PDU now, now looking for some machine
> with an old enough ssh client to be able to connect to the PDU :-|
>
> Which kernel would be most useful to run now?

-current

> I have now moved it to -current GENERIC.MP with the "fast path chunk
> removed from amd64/amd64/fpu.c fpu_kernel_enter() which we still suspect
> as maybe having some issues.

That's perfect from my point of view.

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Stuart Henderson
On 2017/11/13 13:17, Martin Pieuchot wrote:

> On 13/11/17(Mon) 10:03, Stuart Henderson wrote:
> > On 2017/11/13 08:44, Martin Pieuchot wrote:
> > > On 12/11/17(Sun) 22:10, Stuart Henderson wrote:
> > > > On 2017/11/12 22:48, Martin Pieuchot wrote:
> > > > > On 12/11/17(Sun) 21:30, Stuart Henderson wrote:
> > > > > > iked box, GENERIC.MP + WITNESS, -current as of Friday 10th:
> > > > >
> > > > > Weird, did you tweak "kern.splassert" on this box?   Otherwise is looks
> > > > > like a major corruption.
> > > >
> > > > It would have kern.splassert=2. (I know this can cause problems
> > > > sometimes, though this would be the first time in 5+ years I've bumped
> > > > into it, most of my routers where I have serial console have this set).
> > >
> > > Well the panic below correspond to a value of 0 or > 3.
> >
> > Confirmed, it was definitely set to 2.
>
> So it seems that two of your CPU end up looking at/dealing with
> corrupted memory...

Is that for sure? 2 does normally print a trace, 3 also drops into ddb.

> > > > I'm trying to get more information because it had either hanged or
> > > > panicked previously (it didn't have serial connected at the time and
> > > > the machine was needed so it had to be rebooted before I had chance
> > > > to dig into it).
> > >
> > > From which snapshot was the kernel that hanged or panic'd?
> > >
> >
> > It was running this:
> >
> > OpenBSD 6.2-current (GENERIC.MP) #199: Tue Nov  7 18:41:54 MST 2017
> >
> > I've got it onto a remote control PDU now, now looking for some machine
> > with an old enough ssh client to be able to connect to the PDU :-|
> >
> > Which kernel would be most useful to run now?
>
> -current
>
> > I have now moved it to -current GENERIC.MP with the "fast path chunk
> > removed from amd64/amd64/fpu.c fpu_kernel_enter() which we still suspect
> > as maybe having some issues.
>
> That's perfect from my point of view.
>

Same after an hour or two uptime, but this time I get some "netlock:
lock not held" from some cpu or other, and some functions in the bits of
the trace that get displayed:

login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310
Starting stack trace...
panic() at panic+0x11b
__assert(ffffffff812105d4,ffff80001f898a70,ffffff0063dc5b00,ffffff0061804318) at __assert+0x24
sbappendaddr(0,ffffff0061804318,ffffff005fca5600,0,ffffff0063dc5b00) at sbappendaddrpanic: netlock: lock not held
Faulted in traceback, aborting...
+0x276
pfkey_sendup(4,c,ffff8000008f8b00) at pfkey_sendup+0x75
pfkeyv2_sendmessage(ffffff00617e9160,ffff800000902700,ffffff00617e00a0,1,ffff8000009027d8,2) at pfkeyv2_sendmessage+0x228
pfkeyv2_acquire(ffffff00617e924c,ffffff0067772090,ffffff006777201c,ffffff00617e9160,ffff80001f898dc8) at pfkeyv2_acquire+0x553
ipsp_acquire_sa(ffffff00617e9160,0,ffff8000004d3880,ffff80001f898f20,0) at panic: netlock: lock not heldipsp_acquire_sa
Faulted in traceback, aborting...
+0x4c6panic: netlock: lock not held
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in traceback, aborting...
ipsp_spd_lookup(panic: ffffff0005747400,netlock: lock not held
Faulted in traceback, aborting...
0,panic: netlock: lock not heldffff8000004dc900,ffff80001f898fb0
Faulted in traceback, aborting...
,panic: netlock: lock not held
Faulted in traceback, aborting...
0,panic: netlock: lock not held
Faulted in traceback, aborting...
9c519d9d517a98c1) at panic: netlock: lock not held
Faulted in traceback, aborting...
ipsp_spd_lookuppanic: netlock: lock not held+0xcbe
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in traceback, aborting...
ip_output_ipsec_lookup(panic: netlock: lock not held
Faulted in traceback, aborting...
ffff80001f898fc0,panic: netlock: lock not held
Faulted in traceback, aborting...
ffffff006276f4d4,panic: netlock: lock not heldffff8000004dc900
Faulted in traceback, aborting...
,panic: netlock: lock not held
Faulted in traceback, aborting...
ffff80001f898fb0,panic: netlock: lock not held
Faulted in traceback, aborting...
0) at panic: netlock: lock not held
Faulted in traceback, aborting...
ip_output_ipsec_lookuppanic: netlock: lock not held+0x34
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in traceback, aborting...
ip_output(panic: netlock: lock not held
Faulted in traceback, aborting...
0,panic: 0,netlock: lock not held
Faulted in traceback, aborting...
1,panic: netlock: lock not held
Faulted in traceback, aborting...
ffffff00615ed020panic: netlock: lock not held
Faulted in traceback, aborting...
,panic: ffffff0005747400,netlock: lock not held
Faulted in traceback, aborting...
9c519d9d517a98c1) at panic: ip_outputnetlock: lock not held
Faulted in traceback, aborting...
+0x3e7panic: netlock: lock not held
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in traceback, aborting...
ip_forward(panic: netlock: lock not held
Faulted in traceback, aborting...
ffff8000008f9800,panic: netlock: lock not held14,
Faulted in traceback, aborting...
ffff80001f899190,panic: netlock: lock not held
Faulted in traceback, aborting...
ffff80001f89918cpanic: netlock: lock not held
Faulted in traceback, aborting...
) at panic: netlock: lock not held
Faulted in traceback, aborting...
ip_forwardpanic: netlock: lock not held+0x25a
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in traceback, aborting...
ip_input_if(panic: netlock: lock not held
ffff8000008f0800,Faulted in traceback, aborting...
panic: ffffff006276f4c6,netlock: lock not held
Faulted in traceback, aborting...
800,panic: netlock: lock not heldffffff0005747400,
Faulted in traceback, aborting...
ffffff0005747400) at panic: netlock: lock not held
ip_input_ifFaulted in traceback, aborting...
+0x5cepanic: netlock: lock not held
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in traceback, aborting...
ipv4_input(panic: netlock: lock not held9c519d9d517a98c1
Faulted in traceback, aborting...
,ffffff0005747400) at panic: netlock: lock not heldipv4_input
Faulted in traceback, aborting...
+0x39panic: netlock: lock not held
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in traceback, aborting...
panic: netlock: lock not heldether_input(
Faulted in traceback, aborting...
ffff8000008f99f8,panic: netlock: lock not held
Faulted in traceback, aborting...
ffffff0005747400,panic: netlock: lock not held
Faulted in traceback, aborting...
ffff8000000b1f20) at panic: netlock: lock not held
Faulted in traceback, aborting...
ether_inputpanic: +0x2cbnetlock: lock not held
Faulted in traceback, aborting...

panic: netlock: lock not held
Faulted in[halt sent]
PCEngines apu2
coreboot build 20160311
-2064 MB DRAM

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Sebastien Marie-3
On Mon, Nov 13, 2017 at 12:33:35PM +0000, Stuart Henderson wrote:
>
> Same after an hour or two uptime, but this time I get some "netlock:
> lock not held" from some cpu or other, and some functions in the bits of
> the trace that get displayed:
>
> login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310

just a simple question regarding the previous line.

does the start of the line ("login: ") is part of the kernel output or
it is just the login(1) prompt on console (printed long time before the
panic) and you copied the whole line ?

thanks.
--
Sebastien Marie

Reply | Threaded
Open this post in threaded view
|

Re: assertion "_kernel_lock_held()" failed, uipc_socket2.c: ipsec

Martin Pieuchot
In reply to this post by Stuart Henderson
On 13/11/17(Mon) 12:33, Stuart Henderson wrote:
> On 2017/11/13 13:17, Martin Pieuchot wrote:
> > [...]
> > So it seems that two of your CPU end up looking at/dealing with
> > corrupted memory...
>
> Is that for sure? 2 does normally print a trace, 3 also drops into ddb.

But none of them print:

panic: spl assertion failure in soassertlocked.

However it might just be a race because the other CPU just entered panic
and set splassert_ctl to 0.

> Same after an hour or two uptime, but this time I get some "netlock:
> lock not held" from some cpu or other, and some functions in the bits of
> the trace that get displayed:
>
> login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file "/src/cvs-openbsd/sys/kern/uipc_socket2.c", line 310
> Starting stack trace...
> panic() at panic+0x11b
> __assert(ffffffff812105d4,ffff80001f898a70,ffffff0063dc5b00,ffffff0061804318) at __assert+0x24
> sbappendaddr(0,ffffff0061804318,ffffff005fca5600,0,ffffff0063dc5b00) at sbappendaddrpanic: netlock: lock not held

Does the diff below help?  It should in any case reduce the "netlock:
lock not held" noises.

Index: net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.173
diff -u -p -r1.173 pfkeyv2.c
--- net/pfkeyv2.c 12 Nov 2017 14:11:15 -0000 1.173
+++ net/pfkeyv2.c 13 Nov 2017 12:57:36 -0000
@@ -428,12 +428,14 @@ pfkeyv2_sendmessage(void **headers, int
  * Search for promiscuous listeners, skipping the
  * original destination.
  */
+ KERNEL_LOCK();
  LIST_FOREACH(s, &pfkeyv2_sockets, kcb_list) {
  if ((s->flags & PFKEYV2_SOCKETFLAGS_PROMISC) &&
     (s->rcb.rcb_socket != so) &&
     (s->rdomain == rdomain))
  pfkey_sendup(s, packet, 1);
  }
+ KERNEL_UNLOCK();
  m_freem(packet);
  break;
 
@@ -442,6 +444,7 @@ pfkeyv2_sendmessage(void **headers, int
  * Send the message to all registered sockets that match
  * the specified satype (e.g., all IPSEC-ESP negotiators)
  */
+ KERNEL_LOCK();
  LIST_FOREACH(s, &pfkeyv2_sockets, kcb_list) {
  if ((s->flags & PFKEYV2_SOCKETFLAGS_REGISTERED) &&
     (s->rdomain == rdomain)) {
@@ -454,6 +457,7 @@ pfkeyv2_sendmessage(void **headers, int
  }
  }
  }
+ KERNEL_UNLOCK();
  /* Free last/original copy of the packet */
  m_freem(packet);
 
@@ -472,21 +476,25 @@ pfkeyv2_sendmessage(void **headers, int
  goto ret;
 
  /* Send to all registered promiscuous listeners */
+ KERNEL_LOCK();
  LIST_FOREACH(s, &pfkeyv2_sockets, kcb_list) {
  if ((s->flags & PFKEYV2_SOCKETFLAGS_PROMISC) &&
     !(s->flags & PFKEYV2_SOCKETFLAGS_REGISTERED) &&
     (s->rdomain == rdomain))
  pfkey_sendup(s, packet, 1);
  }
+ KERNEL_UNLOCK();
  m_freem(packet);
  break;
 
  case PFKEYV2_SENDMESSAGE_BROADCAST:
  /* Send message to all sockets */
+ KERNEL_LOCK();
  LIST_FOREACH(s, &pfkeyv2_sockets, kcb_list) {
  if (s->rdomain == rdomain)
  pfkey_sendup(s, packet, 1);
  }
+ KERNEL_UNLOCK();
  m_freem(packet);
  break;
  }
@@ -1010,11 +1018,13 @@ pfkeyv2_send(struct socket *so, void *me
  goto ret;
 
  /* Send to all promiscuous listeners */
+ KERNEL_LOCK();
  LIST_FOREACH(bkp, &pfkeyv2_sockets, kcb_list) {
  if ((bkp->flags & PFKEYV2_SOCKETFLAGS_PROMISC) &&
     (bkp->rdomain == rdomain))
  pfkey_sendup(bkp, packet, 1);
  }
+ KERNEL_UNLOCK();
 
  m_freem(packet);
 
@@ -1788,12 +1798,15 @@ pfkeyv2_send(struct socket *so, void *me
  if ((rval = pfdatatopacket(message, len, &packet)) != 0)
  goto ret;
 
- LIST_FOREACH(bkp, &pfkeyv2_sockets, kcb_list)
+ KERNEL_LOCK();
+ LIST_FOREACH(bkp, &pfkeyv2_sockets, kcb_list) {
  if ((bkp != kp) &&
     (bkp->rdomain == rdomain) &&
     (!smsg->sadb_msg_seq ||
     (smsg->sadb_msg_seq == kp->pid)))
  pfkey_sendup(bkp, packet, 1);
+ }
+ KERNEL_UNLOCK();
 
  m_freem(packet);
  } else {
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.134
diff -u -p -r1.134 systm.h
--- sys/systm.h 10 Nov 2017 08:55:49 -0000 1.134
+++ sys/systm.h 13 Nov 2017 12:58:22 -0000
@@ -307,14 +307,14 @@ extern struct rwlock netlock;
 #define NET_ASSERT_WLOCKED() \
 do { \
  int _s = rw_status(&netlock); \
- if (_s != RW_WRITE) \
+ if ((splassert_ctl > 0) && (_s != RW_WRITE)) \
  splassert_fail(RW_WRITE, _s, __func__); \
 } while (0)
 
 #define NET_ASSERT_WUNLOCKED() \
 do { \
  int _s = rw_status(&netlock); \
- if (_s == RW_WRITE) \
+ if ((splassert_ctl > 0) && (_s == RW_WRITE)) \
  splassert_fail(0, RW_WRITE, __func__); \
 } while (0)
 
@@ -324,7 +324,7 @@ do { \
 #define NET_ASSERT_LOCKED() \
 do { \
  int _s = rw_status(&netlock); \
- if (_s != RW_WRITE && _s != RW_READ) \
+ if ((splassert_ctl > 0) && (_s != RW_WRITE && _s != RW_READ)) \
  splassert_fail(RW_READ, _s, __func__); \
 } while (0)