panic witness_warn

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

panic witness_warn

Alexander Bluhm
Hi,

On my amd64 regress machine I see a witness_warn panic now.

OpenBSD 6.3-current (GENERIC.MP) #65: Fri Jun  1 17:44:06 MDT 2018
    [hidden email]:/usr/src/sys/arch/amd64/compile/GENERIC.MP

acquiring duplicate lock of same type: "&mp->mnt_lock"
 1st vfslock @ /usr/src/sys/kern/vfs_subr.c:191
 2nd vfslock @ /usr/src/sys/kern/vfs_subr.c:191
Starting stack trace...
witness_checkorder(9,ffffffff819de278,bf,ffff8000006c7840,21) at witness_checkorder+0x63d
_rw_enter(0,ffff8000006c7840,ffff8000006c7800,80000) at _rw_enter+0x56
vfs_busy(ffff800022050180,ffff8000006ca400) at vfs_busy+0x66
dounmount(ffff800022050180,ffff8000ffef1c30,1) at dounmount+0x8a
sys_unmount(160,ffff8000ffef1c30,16) at sys_unmount+0xba
syscall(7f7ffffe7c20) at syscall+0x31d
Xsyscall_untramp(6,0,0,0,0,16) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7ffffe8060, count: 250
End of stack trace.
userret: returning with the following locks held:
exclusive rrwlock inode r = 0 (0xffffff0241320f80) locked @ /usr/src/sys/ufs/ufs/ufs_vnops.c:1559
papnaicn:i cw:i tkneersnse_lw adrina
gStopped at      db_enter+0x5:   popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*397087   5443      0    0x100002          0    0  cp
 174469  59263     73    0x100010          0    2K syslogd
 157117  56285      0     0x14000      0x200    1  reaper
db_enter() at db_enter+0x5
panic() at panic+0x138
witness_warn(ffff8000ffa89690,ffff8000ffffdbc8,5) at witness_warn+0x47e
userret(ffff8000ffffdbc8) at userret+0x190
syscall(1a3e4b16d780) at syscall+0x416
Xsyscall_untramp(6,0,0,0,0,2) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7fffff2dd0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}> trace
db_enter() at db_enter+0x5
panic() at panic+0x138
witness_warn(ffff8000ffa89690,ffff8000ffffdbc8,5) at witness_warn+0x47e
userret(ffff8000ffffdbc8) at userret+0x190
syscall(1a3e4b16d780) at syscall+0x416
Xsyscall_untramp(6,0,0,0,0,2) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7fffff2dd0, count: -6
ddb{0}> show panic
witness_warn
ddb{0}> show register
rdi               0xffffffff81ba4030    kprintf_mutex+0x10
rsi                              0x8
rbp               0xffff8000ffa894e0
rbx               0xffff8000ffa89580
rdx               0xffffffff819ce073    dll_speed_table+0x9d3
rcx                            0x201
rax                              0x1
r8                0xffff8000ffa894b0
r9                0x8080808080808080
r10               0xffff8000ffa894d8
r11               0xffffffff81656ca0    x86_bus_space_io_read_1
r12                     0x3000000008
r13               0xffff8000ffa894f0
r14                            0x100
r15               0xffffffff819fcd59    i830_bc_wm_info+0x281
rip               0xffffffff815d5ac5    db_enter+0x5
cs                               0x8
rflags                         0x202
rsp               0xffff8000ffa894e0
ss                              0x10
db_enter+0x5:   popq    %rbp
ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
* 5443  397087      1      0  7    0x100002                cp
 53242  267775  96119      0  3    0x10008a  pause         make
 96119    1926  63816      0  3    0x10008a  pause         sh
 63816  148879  54466      0  3    0x10008a  pause         make
  3454   96597  75594      0  3    0x100082  piperd        gzip
 75594  102367  54466      0  3    0x100082  piperd        pax
 54466   56520  51132      0  3        0x82  piperd        perl
 51132  108585  75657      0  3    0x10008a  pause         ksh
 75657  329342  25504      0  3        0x92  select        sshd
 90541   67298      1      0  3    0x100083  ttyin         getty
 67781  180626      1      0  3    0x100083  ttyin         getty
 24187   77492      1      0  3    0x100083  ttyin         getty
   918  224848      1      0  3    0x100083  ttyin         getty
 13705  513022      1      0  3    0x100083  ttyin         getty
 65400  130475      1      0  3    0x100083  ttyin         getty
 57752   49491      1      0  3    0x100098  poll          cron
 18699  445424      0      0  3     0x14280  nfsidl        nfsio
 28801  439290      0      0  3     0x14280  nfsidl        nfsio
 74268  153048      0      0  3     0x14280  nfsidl        nfsio
 32886  255688      0      0  3     0x14280  nfsidl        nfsio
 21303  398601      1     99  3    0x100090  poll          sndiod
 16832  417143      1    110  3    0x100090  poll          sndiod
 16311  337353  72178     95  3    0x100092  kqread        smtpd
 71318   47528  72178    103  3    0x100092  kqread        smtpd
 96699   35402  72178     95  3    0x100092  kqread        smtpd
 29558  384597  72178     95  3    0x100092  kqread        smtpd
 52970  409419  72178     95  3    0x100092  kqread        smtpd
  6162  119654  72178     95  3    0x100092  kqread        smtpd
 72178   43800      1      0  3    0x100080  kqread        smtpd
 25504  200285      1      0  3        0x80  select        sshd
 97323  463909      0      0  3     0x14200  acct          acct
 81836  331635      1      0  3    0x100080  poll          ntpd
 31788  338400  47559     83  3    0x100092  poll          ntpd
 47559  133357      1     83  3    0x100092  poll          ntpd
 74235  387905  53359     74  3    0x100092  bpf           pflogd
 53359   63978      1      0  3        0x80  netio         pflogd
 59263  174469  56112     73  7    0x100010                syslogd
 56112   38159      1      0  3    0x100082  netio         syslogd
 40096  442729      1     77  3    0x100090  poll          dhclient
 91722  321381      1      0  3        0x80  poll          dhclient
 28214  447838  11201    115  3    0x100092  kqread        slaacd
 79970  333791  11201    115  3    0x100092  kqread        slaacd
 11201  495154      1      0  3        0x80  kqread        slaacd
 97438  145610      0      0  3     0x14200  pgzero        zerothread
 26642  375790      0      0  3     0x14200  aiodoned      aiodoned
 29468  290669      0      0  3     0x14200  syncer        update
 62752  343099      0      0  3     0x14200  cleaner       cleaner
 56285  157117      0      0  7     0x14200                reaper
 94885  518466      0      0  3     0x14200  pgdaemon      pagedaemon
 89841  281877      0      0  3     0x14200  bored         crynlk
 38064  106650      0      0  3     0x14200  bored         crypto
 42915  382058      0      0  3     0x14200  bored         sensors
 31848  220189      0      0  3     0x14200  usbtsk        usbtask
 12079   83586      0      0  3     0x14200  usbatsk       usbatsk
 47059  478478      0      0  3  0x40014200  acpi0         acpi0
 89354  398471      0      0  7  0x40014200                idle3
 10192  384745      0      0  3  0x40014200                idle2
 79805   86966      0      0  3  0x40014200                idle1
 67313  137756      0      0  3     0x14200  bored         softnet
   756   98492      0      0  3     0x14200  bored         systqmp
 36353   87771      0      0  3     0x14200  bored         systq
 97508    7210      0      0  3  0x40014200  bored         softclock
 78082  328642      0      0  3  0x40014200                idle0
     1    9984      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{0}> machine ddb 1

Here it hangs.

bluhm

Reply | Threaded
Open this post in threaded view
|

Re: panic witness_warn

Alexander Bluhm
On Mon, Jun 04, 2018 at 01:23:53AM +0200, Alexander Bluhm wrote:
> On my amd64 regress machine I see a witness_warn panic now.

It is triggered by /usr/src/regress/sys/kern/mount.  With visa@'s
improved stack trace I see:

======== run-regress-unmount-busy ========
cp -r /usr /mnt/regress-mount &
sleep 5
sync
sleep 10
rm -rf /mnt/regress-mount/usr &
sleep .1
umount -f /mnt/regress-mount
userret: returning with the following locks helfsck -y /dev/rvnd:d0a 2>&1 | tee f
esck-clean.log
xclusive rrwlock inode r = 0 (0xffffff023d492b48) locked @ /usr/src/sys/ufs/ufs/ufs_vnops.c:1559
#0  witness_lock+0x254
#1  _rw_enter+0x29b
#2  _rrw_enrm: terfts_read: +0x3e
#3  VOP_LOCK+0x3d
#4  vn_lock+0x34
#5  vget+0xf7
#6  cache_lookup+0x217
#7  ufs_lookup+0x112
#8  VOP_LOOKUP+0x4f
#9  vfs_lookup+0x27e
#directory          No such file or
10 namei+0x226
#11 vn_open+0xcf
#12 doopenat+0x1af
#13 syscall+0x32a
#14 Xsyscall_untramp+0xc0
panic: witness_warn
Stopped at      db_enter+0x12:  popq    %r11
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*340900  45836      0    0x100003          0    2  cp
db_enter() at db_enter+0x12
panic() at panic+0x138
witness_warn(4daf31f665ef77b2,ffff800021f74290,ffff8000ffff6960) at witness_war
n+0x4fb
userret(184acb3033f01432) at userret+0x1a0
syscall(e44e1a254de07323) at syscall+0x423
Xsyscall_untramp(6,0,0,0,0,2) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7ffffd19b0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{2}> show locks
exclusive rrwlock inode r = 0 (0xffffff023d492b48) locked @ /usr/src/sys/ufs/uf
s/ufs_vnops.c:1559
#0  witness_lock+0x254
#1  _rw_enter+0x29b
#2  _rrw_enter+0x3e
#3  VOP_LOCK+0x3d
#4  vn_lock+0x34
#5  vget+0xf7
#6  cache_lookup+0x217
#7  ufs_lookup+0x112
#8  VOP_LOOKUP+0x4f
#9  vfs_lookup+0x27e
#10 namei+0x226
#11 vn_open+0xcf
#12 doopenat+0x1af
#13 syscall+0x32a
#14 Xsyscall_untramp+0xc0
ddb{2}> show register
rdi               0xffffffff81ca2888    kprintf_mutex+0x10
rsi                              0x8
rbp               0xffff800021f740e0
rbx               0xffff800021f74180
rdx               0xffffffff81bfddd6    intel_hpll_vco.cl_vco+0x3e6
rcx                            0x201
rax                              0x1
r8                0xffff800021f740b0
r9                0x8080808080808080
r10               0x3df1125987f3e8ca
r11               0x1a70d75cd6a8e458
r12                     0x3000000008
r13               0xffff800021f740f0
r14                            0x100
r15               0xffffffff81c0463f    nviic_ids+0x7df
rip               0xffffffff81767d82    db_enter+0x12
cs                               0x8
rflags                         0x202
rsp               0xffff800021f740d8
ss                              0x10
db_enter+0x12:  popq    %r11
ddb{2}>

All other CPUs are in the idle thread.

bluhm

Reply | Threaded
Open this post in threaded view
|

Re: panic witness_warn

Alexander Bluhm
On Mon, Jun 04, 2018 at 08:30:55PM +0200, Alexander Bluhm wrote:
> On Mon, Jun 04, 2018 at 01:23:53AM +0200, Alexander Bluhm wrote:
> > On my amd64 regress machine I see a witness_warn panic now.
>
> It is triggered by /usr/src/regress/sys/kern/mount.  With visa@'s
> improved stack trace I see:

My serial console server mangled the output.  Here is better one
from ddb> dmesg.

userret: returning with the following locks held:
exclusive rrwlock inode r = 0 (0xffffff023d492b48) locked @ /usr/src/sys/ufs/uf
s/ufs_vnops.c:1559
#0  witness_lock+0x254
#1  _rw_enter+0x29b
#2  _rrw_enter+0x3e
#3  VOP_LOCK+0x3d
#4  vn_lock+0x34
#5  vget+0xf7
#6  cache_lookup+0x217
#7  ufs_lookup+0x112
#8  VOP_LOOKUP+0x4f
#9  vfs_lookup+0x27e
#10 namei+0x226
#11 vn_open+0xcf
#12 doopenat+0x1af
#13 syscall+0x32a
#14 Xsyscall_untramp+0xc0
panic: witness_warn
Stopped at      db_enter+0x12:  popq    %r11
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*340900  45836      0    0x100003          0    2  cp
db_enter() at db_enter+0x12
panic() at panic+0x138
witness_warn(4daf31f665ef77b2,ffff800021f74290,ffff8000ffff6960) at witness_war
n+0x4fb
userret(184acb3033f01432) at userret+0x1a0
syscall(e44e1a254de07323) at syscall+0x423
Xsyscall_untramp(6,0,0,0,0,2) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7ffffd19b0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.

Reply | Threaded
Open this post in threaded view
|

Re: panic witness_warn

Alexander Bluhm
On Mon, Jun 04, 2018 at 08:53:49PM +0200, Alexander Bluhm wrote:

> userret: returning with the following locks held:
> exclusive rrwlock inode r = 0 (0xffffff023d492b48) locked @ /usr/src/sys/ufs/uf
> s/ufs_vnops.c:1559
> #0  witness_lock+0x254
> #1  _rw_enter+0x29b
> #2  _rrw_enter+0x3e
> #3  VOP_LOCK+0x3d
> #4  vn_lock+0x34
> #5  vget+0xf7
> #6  cache_lookup+0x217
> #7  ufs_lookup+0x112
> #8  VOP_LOOKUP+0x4f
> #9  vfs_lookup+0x27e
> #10 namei+0x226
> #11 vn_open+0xcf
> #12 doopenat+0x1af
> #13 syscall+0x32a
> #14 Xsyscall_untramp+0xc0
> panic: witness_warn

We are leaking a vnode in namei().  The mount check was copied from
NetBSD in 2003.  Later in 2006 they added a vput().  As we have not
locked the directory vnode, call vrele() there.

ok?

bluhm

Index: kern/vfs_lookup.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/vfs_lookup.c,v
retrieving revision 1.69
diff -u -p -r1.69 vfs_lookup.c
--- kern/vfs_lookup.c 2 May 2018 02:24:56 -0000 1.69
+++ kern/vfs_lookup.c 4 Jul 2018 10:44:48 -0000
@@ -203,6 +203,8 @@ fail:
  if (!dp->v_mount) {
  /* Give up if the directory is no longer mounted */
  pool_put(&namei_pool, cnp->cn_pnbuf);
+ vrele(dp);
+ ndp->ni_vp = NULL;
  return (ENOENT);
  }
  cnp->cn_nameptr = cnp->cn_pnbuf;

Reply | Threaded
Open this post in threaded view
|

Re: panic witness_warn

Kenneth R Westerback-2
On Wed, Jul 04, 2018 at 05:48:21PM +0200, Alexander Bluhm wrote:

> On Mon, Jun 04, 2018 at 08:53:49PM +0200, Alexander Bluhm wrote:
> > userret: returning with the following locks held:
> > exclusive rrwlock inode r = 0 (0xffffff023d492b48) locked @ /usr/src/sys/ufs/uf
> > s/ufs_vnops.c:1559
> > #0  witness_lock+0x254
> > #1  _rw_enter+0x29b
> > #2  _rrw_enter+0x3e
> > #3  VOP_LOCK+0x3d
> > #4  vn_lock+0x34
> > #5  vget+0xf7
> > #6  cache_lookup+0x217
> > #7  ufs_lookup+0x112
> > #8  VOP_LOOKUP+0x4f
> > #9  vfs_lookup+0x27e
> > #10 namei+0x226
> > #11 vn_open+0xcf
> > #12 doopenat+0x1af
> > #13 syscall+0x32a
> > #14 Xsyscall_untramp+0xc0
> > panic: witness_warn
>
> We are leaking a vnode in namei().  The mount check was copied from
> NetBSD in 2003.  Later in 2006 they added a vput().  As we have not
> locked the directory vnode, call vrele() there.
>
> ok?
>
> bluhm
>
> Index: kern/vfs_lookup.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/vfs_lookup.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 vfs_lookup.c
> --- kern/vfs_lookup.c 2 May 2018 02:24:56 -0000 1.69
> +++ kern/vfs_lookup.c 4 Jul 2018 10:44:48 -0000
> @@ -203,6 +203,8 @@ fail:
>   if (!dp->v_mount) {
>   /* Give up if the directory is no longer mounted */
>   pool_put(&namei_pool, cnp->cn_pnbuf);
> + vrele(dp);
> + ndp->ni_vp = NULL;
>   return (ENOENT);
>   }
>   cnp->cn_nameptr = cnp->cn_pnbuf;
>

Makes sense to me. ok krw@ fwiw.

.... Ken