panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
7 messages Options
Reply | Threaded
Open this post in threaded view
|

panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Hrvoje Popovski
hi all,

sending ip6 traffic from host connected to vlan200 on ix0 to host
connected to vlan300 on ix1 and running

ifconfig vlan200 destroy
ifconfig ix0 down
sh netstart vlan200
sh netstart ix0
ifconfing vlan300 destroy
ifconfig ix1 down
sh netstart vlan300
sh netstart ix1
sh netstart

triggers panic.

i'm running current from today with WITNESS. there aren't any traffic
over em interfaces except em0 which is ssh interface.

i will leave box in ddb if something else is needed

em2: watchdog: head 1 tail 0 TDH 1 TDT 1
panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"
failed:4
Stopped at      db_enter+0x5:   popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  80887  95109      0         0x3          0    9  ifconfig
*360150  85115      0     0x14000      0x200    2  softnet
  23357  56847      0     0x14000 0x40000210    0  sbar
db_enter() at db_enter+0x5
panic() at panic+0x141
__assert(ffffffff813a2004,ffff800023bc4de0,0,ffff800000504000) at
__assert+0x24

em_stop(ffff800000504000,7) at em_stop+0x118
em_init(1) at em_init+0x25
em_watchdog(0) at em_watchdog+0xc1
if_watchdog_task(ffffffff810ff560) at if_watchdog_task+0xa4
taskq_thread(0) at taskq_thread+0x67
end trace frame: 0x0, count: 7
https://www.openbsd.org/ddb.html describes the minimum info required in
bug reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{2}>

ddb{2}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 95109   80887  72047      0  7         0x3                ifconfig
 72047  294283  94572      0  3    0x10008b  pause         sh
 94572   16314      1      0  3    0x10008b  pause         ksh
 20232  104630      1      0  3    0x100083  ttyin         getty
 18758  200335      1      0  3    0x100083  ttyin         getty
 80504  409360      1      0  3    0x100083  ttyin         getty
 32325  245465      1      0  3    0x100083  ttyin         getty
  5943  359471      1      0  3    0x100083  ttyin         getty
 71304  153417      1      0  3    0x100098  poll          cron
 95447  432582  92345     95  3    0x100092  kqread        smtpd
 78232  180428  92345    103  3    0x100092  kqread        smtpd
 29625  187955  92345     95  3    0x100092  kqread        smtpd
 32384   37526  92345     95  3    0x100092  kqread        smtpd
 57902   72505  92345     95  3    0x100092  kqread        smtpd
 20724  373549  92345     95  3    0x100092  kqread        smtpd
 92345    1586      1      0  3    0x100080  kqread        smtpd
 56700   88302      1      0  3        0x80  select        sshd
 49765  237636  21942     83  3    0x100092  poll          ntpd
 21942  429768   5538     83  3    0x100092  poll          ntpd
  5538  176012      1      0  3    0x100080  poll          ntpd
 77148  119832  60452     73  3    0x100010  biowait       syslogd
 60452  451130      1      0  3    0x100082  netio         syslogd
 49364  146738      0      0  3     0x14200  pgzero        zerothread
 39018   42215      0      0  3     0x14200  aiodoned      aiodoned
 75816  380729      0      0  3     0x14200  syncer        update
 26056  203298      0      0  3     0x14200  cleaner       cleaner
 15695  414254      0      0  3     0x14200  reaper        reaper
 31352  212123      0      0  3     0x14200  pgdaemon      pagedaemon
 93280  172572      0      0  3     0x14200  bored         crynlk
 48169  138621      0      0  3     0x14200  bored         crypto
 24870   43591      0      0  3     0x14200  usbtsk        usbtask
 91665  128470      0      0  3     0x14200  usbatsk       usbatsk
 90806  283372      0      0  3  0x40014200  acpi0         acpi0
 30387  176727      0      0  7  0x40014200                idle11
 54717  168183      0      0  7  0x40014200                idle10
 44439  453648      0      0  3  0x40014200                idle9
 92750  223373      0      0  7  0x40014200                idle8
 33752  166086      0      0  7  0x40014200                idle7
 49635   88837      0      0  7  0x40014200                idle6
 78684  152787      0      0  7  0x40014200                idle5
 87214  263209      0      0  7  0x40014200                idle4
 18455  148347      0      0  7  0x40014200                idle3
 16594   77843      0      0  3  0x40014200                idle2
 82707   27491      0      0  7  0x40014200                idle1
 51618  356368      0      0  3     0x14200  bored         sensors
*85115  360150      0      0  7     0x14200                softnet
 38207  341896      0      0  3     0x14200  bored         systqmp
 17776  250136      0      0  3     0x14200  bored         systq
 55761  357215      0      0  3  0x40014200  bored         softclock
 84701  248559      0      0  3  0x40014200                idle0
 56847   23357      0      0  7  0x40014210                sbar
     1  264967      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{2}>


ddb{0}> show witness /b
Number of known direct relationships is 207

Lock order reversal between "&mp->mnt_lock"(rwlock) and
"&ip->i_lock"(rrwlock)!

Lock order "&mp->mnt_lock"(rwlock) -> "&ip->i_lock"(rrwlock) first seen at:
#0  witness_checkorder+0x466
#1  _rw_enter+0x56
#2  _rrw_enter+0x32
#3  VOP_LOCK+0x31
#4  vn_lock+0x36
#5  vget+0xba
#6  cache_lookup+0x173
#7  ufs_lookup+0x10e
#8  VOP_LOOKUP+0x33
#9  vfs_lookup+0x26e
#10 namei+0x1eb
#11 ffs_mount+0x11d
#12 sys_mount+0x2da
#13 syscall+0x279

Lock order "&ip->i_lock"(rrwlock) -> "&mp->mnt_lock"(rwlock) first seen at:
#0  witness_checkorder+0x466
#1  _rw_enter+0x56
#2  vfs_busy+0x64
#3  vfs_lookup+0x38b
#4  namei+0x1eb
#5  doreadlinkat+0x6d
#6  syscall+0x279


ddb{2}> mach ddbcpu 0
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff8155c12b) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{0}> trace
x86_ipi_db(ffffffff8155c12b) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{0}> mach ddbcpu 1
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{1}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{1}> mach ddbcpu 3
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{3}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{3}> mach ddbcpu 4
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{4}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{4}> mach ddbcpu 5
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{5}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{5}> mach ddbcpu 6
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{6}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{6}> mach ddbcpu 7
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{7}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{7}> mach ddbcpu 8
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{8}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:
ddb{8}> mach ddbcpu 9
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: 12
0x41cb8c419c524153:
ddb{9}> trace
x86_ipi_db(ffffffff815cba25) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x9da83405c75250cc, count: -3
0x41cb8c419c524153:

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Hrvoje Popovski
On 29.11.2017. 18:25, Hrvoje Popovski wrote:

> hi all,
>
> sending ip6 traffic from host connected to vlan200 on ix0 to host
> connected to vlan300 on ix1 and running
>
> ifconfig vlan200 destroy
> ifconfig ix0 down
> sh netstart vlan200
> sh netstart ix0
> ifconfing vlan300 destroy
> ifconfig ix1 down
> sh netstart vlan300
> sh netstart ix1
> sh netstart


while trying to repeat panic, i got a new one:

panic: kernel diagnostic assertion "ifp != NULL" failed: file
"/usr/src/sys/net0
Stopped at      db_enter+0x5:   popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  58285  46948      0         0x3          0    2  ifconfig
* 20256  49787      0     0x14000 0x40000200    0  softclock
db_enter() at db_enter+0x5
panic() at panic+0x141
__assert(ffffffff8176d124,ffff800023bb2a80,0,ffffff0787e214e8) at
__assert+0x24

arptfree(ffffffff81b99008) at arptfree+0x94
arptimer(ffff800023bb2ae0) at arptimer+0x45
softclock_thread(0) at softclock_thread+0xf2
end trace frame: 0x0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in
bug reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}>

ddb{0}> show panic
kernel diagnostic assertion "ifp != NULL" failed: file
"/usr/src/sys/netinet/if_ether.c", line 700

ddb{0}> trace
db_enter() at db_enter+0x5
panic() at panic+0x141
__assert(ffffffff8176d124,ffff800023bb2a80,0,ffffff0787e214e8) at
__assert+0x24

arptfree(ffffffff81b99008) at arptfree+0x94
arptimer(ffff800023bb2ae0) at arptimer+0x45
softclock_thread(0) at softclock_thread+0xf2
end trace frame: 0x0, count: -6

ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 46948   58285  98737      0  7         0x3                ifconfig
 98737  299151   7396      0  3    0x10008b  pause         sh
  7396  111661      1      0  3    0x10008b  pause         ksh
 50249   31490      1      0  3    0x100083  ttyin         getty
 54529  249519      1      0  3    0x100083  ttyin         getty
  2248  149523      1      0  3    0x100083  ttyin         getty
 61430  171168      1      0  3    0x100083  ttyin         getty
 59068  276277      1      0  3    0x100083  ttyin         ksh
 26852  101272      1      0  3    0x100098  poll          cron
 14944  512381  73354     95  3    0x100092  kqread        smtpd
 80524  250998  73354    103  3    0x100092  kqread        smtpd
 32951  502032  73354     95  3    0x100092  kqread        smtpd
 22657  195980  73354     95  3    0x100092  kqread        smtpd
 74698  178224  73354     95  3    0x100092  kqread        smtpd
 99882  341808  73354     95  3    0x100092  kqread        smtpd
 73354  145764      1      0  3    0x100080  kqread        smtpd
 29057  499576      1      0  3        0x80  select        sshd
 51659  398074  68617     83  3    0x100092  poll          ntpd
 68617  421110  63601     83  3    0x100092  poll          ntpd
 63601  484085      1      0  3    0x100080  poll          ntpd
 77537  510901  84046     73  3    0x100090  kqread        syslogd
 84046  350936      1      0  3    0x100082  netio         syslogd
 60988  230583      0      0  3     0x14200  pgzero        zerothread
   675  145644      0      0  3     0x14200  aiodoned      aiodoned
 96938  168167      0      0  3     0x14200  syncer        update
 29269  157260      0      0  3     0x14200  cleaner       cleaner
 15452  491606      0      0  3     0x14200  reaper        reaper
 97237  171400      0      0  3     0x14200  pgdaemon      pagedaemon
 89550  163022      0      0  3     0x14200  bored         crynlk
  5855  159961      0      0  3     0x14200  bored         crypto
  6317  136103      0      0  3     0x14200  usbtsk        usbtask
 16949  180382      0      0  3     0x14200  usbatsk       usbatsk
 40494  441014      0      0  3  0x40014200  acpi0         acpi0
 56960  481184      0      0  7  0x40014200                idle11
 90216  140441      0      0  7  0x40014200                idle10
 97249  341735      0      0  7  0x40014200                idle9
 70430  462543      0      0  7  0x40014200                idle8
 24007   34418      0      0  7  0x40014200                idle7
 36169  124785      0      0  7  0x40014200                idle6
 67431  405241      0      0  7  0x40014200                idle5
 73933  403886      0      0  7  0x40014200                idle4
  4971   24693      0      0  7  0x40014200                idle3
 21819  106720      0      0  3  0x40014200                idle2
 40204  160500      0      0  7  0x40014200                idle1
 71283   24941      0      0  3     0x14200  bored         sensors
 25282    9665      0      0  3     0x14200  netlock       softnet
 62903  207692      0      0  3     0x14200  bored         systqmp
 34390  385115      0      0  3     0x14200  bored         systq
*49787   20256      0      0  7  0x40014200                softclock
 46513  306769      0      0  3  0x40014200                idle0
 98047  290231      0      0  3     0x14200  bored         sbar
     1  418028      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{0}>

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Martin Pieuchot
In reply to this post by Hrvoje Popovski
On 29/11/17(Wed) 18:25, Hrvoje Popovski wrote:

> hi all,
>
> sending ip6 traffic from host connected to vlan200 on ix0 to host
> connected to vlan300 on ix1 and running
>
> ifconfig vlan200 destroy
> ifconfig ix0 down
> sh netstart vlan200
> sh netstart ix0
> ifconfing vlan300 destroy
> ifconfig ix1 down
> sh netstart vlan300
> sh netstart ix1
> sh netstart
>
> triggers panic.
>
> i'm running current from today with WITNESS. there aren't any traffic
> over em interfaces except em0 which is ssh interface.
>
> i will leave box in ddb if something else is needed
>
> em2: watchdog: head 1 tail 0 TDH 1 TDT 1
> panic() at panic+0x141
> __assert(ffffffff813a2004,ffff800023bc4de0,0,ffff800000504000) at
> __assert+0x24
>
> em_stop(ffff800000504000,7) at em_stop+0x118
> em_init(1) at em_init+0x25
> em_watchdog(0) at em_watchdog+0xc1
> if_watchdog_task(ffffffff810ff560) at if_watchdog_task+0xa4
> taskq_thread(0) at taskq_thread+0x67

This is a race in the em(4) driver.  When em_stop() sleeps in the
barriers a context switch can occurs.  If this context switch schedule
a thread that was already sleeping at the same point, like ifconfig,
you get this panic.

If believe em_init() should only be called if IFF_RUNNING is still set.

If you agree on that we should generalize this fix.

Index: if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.336
diff -u -p -r1.336 if_em.c
--- if_em.c 25 Jul 2017 20:45:18 -0000 1.336
+++ if_em.c 7 Dec 2017 11:56:19 -0000
@@ -775,7 +775,8 @@ em_watchdog(struct ifnet *ifp)
     sc->sc_tx_desc_head, sc->sc_tx_desc_tail,
     E1000_READ_REG(&sc->hw, TDH), E1000_READ_REG(&sc->hw, TDT));
 
- em_init(sc);
+ if (ifp->if_flags & IFF_RUNNING)
+ em_init(sc);
 
  sc->watchdog_events++;
 }

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Martin Pieuchot
In reply to this post by Hrvoje Popovski
On 30/11/17(Thu) 12:49, Hrvoje Popovski wrote:

> On 29.11.2017. 18:25, Hrvoje Popovski wrote:
> > hi all,
> >
> > sending ip6 traffic from host connected to vlan200 on ix0 to host
> > connected to vlan300 on ix1 and running
> >
> > ifconfig vlan200 destroy
> > ifconfig ix0 down
> > sh netstart vlan200
> > sh netstart ix0
> > ifconfing vlan300 destroy
> > ifconfig ix1 down
> > sh netstart vlan300
> > sh netstart ix1
> > sh netstart
>
>
> while trying to repeat panic, i got a new one:

Without the trace of the 'ifconfig' process it's hard to know where is
the race.  See the output of "ps /o":

> Stopped at      db_enter+0x5:   popq    %rbp
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>   58285  46948      0         0x3          0    2  ifconfig
> * 20256  49787      0     0x14000 0x40000200    0  softclock

Make sure to include the trace of the two active processes.  Since CPU0
entered DDB you already reported the 'softclock' one.  To get the
'ifconfig' one, type the following next time:

ddb{0}> tr /up 0t58285

Here '58285' is the TID (Thread ID) of the process.

Same thing for the em(4) panic you reported before.  I made an educated
guess about which race can occur.   But having the trace of all active
processes help a lot!

Thanks for you great reports,
Martin

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Hrvoje Popovski
On 7.12.2017. 13:07, Martin Pieuchot wrote:

> On 30/11/17(Thu) 12:49, Hrvoje Popovski wrote:
>> On 29.11.2017. 18:25, Hrvoje Popovski wrote:
>>> hi all,
>>>
>>> sending ip6 traffic from host connected to vlan200 on ix0 to host
>>> connected to vlan300 on ix1 and running
>>>
>>> ifconfig vlan200 destroy
>>> ifconfig ix0 down
>>> sh netstart vlan200
>>> sh netstart ix0
>>> ifconfing vlan300 destroy
>>> ifconfig ix1 down
>>> sh netstart vlan300
>>> sh netstart ix1
>>> sh netstart
>>
>>
>> while trying to repeat panic, i got a new one:
>
> Without the trace of the 'ifconfig' process it's hard to know where is
> the race.  See the output of "ps /o":
>
>> Stopped at      db_enter+0x5:   popq    %rbp
>>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>>   58285  46948      0         0x3          0    2  ifconfig
>> * 20256  49787      0     0x14000 0x40000200    0  softclock
>
> Make sure to include the trace of the two active processes.  Since CPU0
> entered DDB you already reported the 'softclock' one.  To get the
> 'ifconfig' one, type the following next time:
>
> ddb{0}> tr /up 0t58285
>
> Here '58285' is the TID (Thread ID) of the process.
>
> Same thing for the em(4) panic you reported before.  I made an educated
> guess about which race can occur.   But having the trace of all active
> processes help a lot!
>
> Thanks for you great reports,
> Martin
>

yes yes, you are right, sorry for lousy reports.
i'm hitting box with rhythm stick right now; Das ist gut! C'est fantastique!

i hope that it will panic today .. :)

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Hrvoje Popovski
In reply to this post by Martin Pieuchot
On 7.12.2017. 13:07, Martin Pieuchot wrote:
> Same thing for the em(4) panic you reported before.  I made an educated
> guess about which race can occur.   But having the trace of all active
> processes help a lot!

hi,

i had to trigger same panic before applying your em diff and here it is:

panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"
failed: file "/usr/src/sys/dev/pci/if_em.c", line 1534
Stopped at      db_enter+0x5:   popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
 413493  38051      0         0x3          0    2  ifconfig
 424155  37800     73    0x100010          0    3  syslogd
*438295  18225      0     0x14000      0x200    1K softnet
db_enter() at db_enter+0x5
panic() at panic+0x141
__assert(ffffffff810ecd54,ffff800023bc3e10,0,ffff800000504000) at
__assert+0x24

em_stop(ffff800000504000,7) at em_stop+0x118
em_init(2) at em_init+0x25
em_watchdog(0) at em_watchdog+0xc1
if_watchdog_task(ffffffff81668600) at if_watchdog_task+0xa4
taskq_thread(0) at taskq_thread+0x67
end trace frame: 0x0, count: 7
https://www.openbsd.org/ddb.html describes the minimum info required in
bug reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{1}>


db{1}> ps /o
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
 413493  38051      0         0x3          0    2  ifconfig
 424155  37800     73    0x100010          0    3  syslogd
*438295  18225      0     0x14000      0x200    1K softnet
ddb{1}>


ddb{1}> tr /up 0t413493
ffff800023c63ee0(ffffffff81b61278,0,0,0,44400080000,1) at 0xffff800023c63ee0
end(uvm_fault(0xffffffff81afedf8, 0xfffffffffffffff7, 0, 1) -> e
kernel: page fault trap, code=0
Faulted in DDB; continuing...
ddb{1}>


ddb{1}> tr /up 0t424155
w_locklistdata(0,0,0,0,0,1000000000612) at w_locklistdata+0x42460
mi_child_return.child_retval(6816fd006816fa,0,0,0,0,0) at
mi_child_return.child
_retval+0x38c
Bad frame pointer: 0xffffffff81a91ec8
end trace frame: 0xffffffff81a91ec8, count: -2
ddb{1}>


ddb{1}> tr /p 0t438295
em_devices(632e7361635f66,692f6963702f7665,642f7379732f6372,732f7273752f000a,65
63697665642074,6573657220746f6e) at em_devices+0x31c
end of kernel
end trace frame: 0x66693e2d70666928, count: -1


ddb{1}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 38051  413493  98220      0  7         0x3                ifconfig
 98220  510053  42401      0  3    0x10008b  pause         sh
 42401  505610  29809      0  3    0x10008b  pause         sh
 29809   61372      1      0  3    0x10008b  pause         ksh
 84408  518968      1      0  3    0x100083  ttyin         getty
 75374  407043      1      0  3    0x100083  ttyin         getty
 90775  272436      1      0  3    0x100083  ttyin         getty
  2696  405567      1      0  3    0x100083  ttyin         getty
 40449   47349      1      0  3    0x100083  ttyin         getty
 43210  337136      1      0  3    0x100098  poll          cron
  5666  433796  48654     95  3    0x100092  kqread        smtpd
 46164   34892  48654    103  3    0x100092  kqread        smtpd
 54175  260135  48654     95  3    0x100092  kqread        smtpd
 93781  236681  48654     95  3    0x100092  kqread        smtpd
 19987  267311  48654     95  3    0x100092  kqread        smtpd
 94891  422493  48654     95  3    0x100092  kqread        smtpd
 48654  385403      1      0  3    0x100080  kqread        smtpd
  3471  358479      1      0  3        0x80  select        sshd
 28005  289816  85351     83  3    0x100092  poll          ntpd
 85351  307880  53191     83  3    0x100092  poll          ntpd
 53191  361514      1      0  3    0x100080  poll          ntpd
 37800  424155  63169     73  7    0x100010                syslogd
 63169  409917      1      0  3    0x100082  netio         syslogd
  5807  224569      0      0  3     0x14200  pgzero        zerothread
 49859      23      0      0  3     0x14200  aiodoned      aiodoned
 94546  207693      0      0  3     0x14200  syncer        update
 81519  201465      0      0  3     0x14200  cleaner       cleaner
 46403   79488      0      0  3     0x14200  reaper        reaper
 58020  281411      0      0  3     0x14200  pgdaemon      pagedaemon
 74507  148042      0      0  3     0x14200  bored         crynlk
 17591  224078      0      0  3     0x14200  bored         crypto
 38014  194793      0      0  3     0x14200  usbtsk        usbtask
 10734  170157      0      0  3     0x14200  usbatsk       usbatsk
 41674   59869      0      0  3  0x40014200  acpi0         acpi0
 27059   66073      0      0  7  0x40014200                idle11
 74635  434973      0      0  7  0x40014200                idle10
 36027  231269      0      0  7  0x40014200                idle9
   347  433094      0      0  7  0x40014200                idle8
 73086  477530      0      0  7  0x40014200                idle7
 67977   58218      0      0  7  0x40014200                idle6
 82891  185471      0      0  7  0x40014200                idle5
 84493  342954      0      0  7  0x40014200                idle4
 66503   89365      0      0  3  0x40014200                idle3
  3776  407212      0      0  3  0x40014200                idle2
 93247   71210      0      0  3  0x40014200                idle1
 84971  160128      0      0  3     0x14200  bored         sensors
*18225  438295      0      0  7     0x14200                softnet
 87145  102330      0      0  3     0x14200  bored         systqmp
  5600  399537      0      0  3     0x14200  bored         systq
 27906  252867      0      0  2  0x40014200                softclock
 71944  475702      0      0  7  0x40014200                idle0
 17443   68217      0      0  3     0x14200  bored         sbar
     1   72998      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{1}>

ddb{1}> show all pools
Name      Size Requests Fail Releases Pgreq Pgrel Npage Hiwat Minpg
Maxpg Idle
arp         56      412    0      398     1     0     1     1     0
8    0
inpcbpl    280     1332    0     1324     1     0     1     1     0
8    0
plimitpl   152       31    0       14     1     0     1     1     0
8    0
myxmcl     12288      2    0        0     1     0     1     1     0
8    0
plcache    128      132    0        0     5     0     5     5     0
8    0
rtentry    112     1930    0     1858     3     0     3     3     0
8    0
syncache   264        1    0        1     1     1     0     1     0
8    0
tcpqe       32        1    0        1     1     1     0     1     0
8    0
tcpcb      544       17    0       12     1     0     1     1     0
8    0
nd6         48      140    0      134     1     0     1     1     0
8    0
art_heap8  4096       1    0        0     1     0     1     1     0
8    0
art_heap4  256     3025    0     2795    19     4    15    16     0
8    0
art_table   32     3026    0     2795     3     0     3     3     0
8    0
art_node    16     1494    0     1423     1     0     1     1     0
8    0
dirhash    1024      72    0       34     5     0     5     5     0
8    0
dino1pl    128     4555    0     1592   101     5    96    96     0
8    0
ffsino     272     4555    0     1592   208    10   198   198     0
8    0
nchpl      144     5750    0     1654   158     6   152   152     0
8    0
uvmvnodes   72     4571    0        0    84     0    84    84     0
8    0
vnodes     192     4571    0        0   229     0   229   229     0
8    0
namei      1024   39222    0    39222   360   359     1     2     0
8    1
percpumem   96       30    0        0     1     0     1     1     0
8    0
ehcixfer   264      181    0      174     1     0     1     1     0
8    0
scxspl     192    24103    0    24103   330   330     0     6     0
8    0
sigapl     432     3958    0     3934     4     0     4     4     0
8    0
knotepl    112       61    0       17     2     0     2     2     0
8    0
kqueuepl    56        8    0        0     1     0     1     1     0
8    0
pipepl     112     2360    0     2360   238   237     1     1     0
8    1
fdescpl    472     3959    0     3934     5     1     4     5     0
8    0
filepl     112    21601    0    21513     4     1     3     4     0
8    0
lockfpl     88        6    0        6     2     2     0     1     0
8    0
sessionpl  112       17    0        5     1     0     1     1     0
8    0
pgrppl      40       25    0       12     1     0     1     1     0
8    0
ucredpl     96       91    0       74     1     0     1     1     0
8    0
zombiepl   144     3934    0     3934   281   280     1     1     0
8    1
processpl  736     3987    0     3934     7     1     6     6     0
8    0
procpl     568     3987    0     3934     5     0     5     5     0
8    0
srpgc       64      598    0      598   162   161     1     1     0
8    1
sockpl     376     2158    0     2106     6     0     6     6     0
8    0
mcl9k      9216      34    0        0     3     0     3     3     0
8    0
mcl2k2     2112   61698    0        0  1302     0  1302  1302     0
8    0
mcl2k      2048      52    0        0     7     0     7     7     0
8    0
mbufpl     256    64115    0        0  1218     2  1216  1216     0
8    0
bufpl      256    23059    0     4686  1149     0  1149  1149     0
8    0
anonpl      16   302789    0   299099   162   142    20   162     0
2023    0
amapchunkpl 152    3916    0     3740    69    60     9    69     0
158    0
amappl16   192     3093    0     3078    13    12     1     8     0
8    0
amappl15   184      312    0      310     3     2     1     2     0
8    0
amappl14   176      731    0      728     4     3     1     3     0
8    0
amappl13   168     1021    0     1018     3     2     1     3     0
8    0
amappl12   160      466    0      465     4     3     1     4     0
8    0
amappl11   152     5159    0     5124     5     3     2     5     0
8    0
amappl10   144      986    0      985     6     5     1     5     0
8    0
amappl9    136      876    0      876   186   185     1     6     0
8    1
amappl8    128     5008    0     5004    55    53     2    33     0
8    0
amappl7    120      616    0      602     7     6     1     6     0
8    0
amappl6    112      992    0      969     7     6     1     6     0
8    0
amappl5    104     2746    0     2708     9     7     2     8     0
8    0
amappl4     96     2396    0     2340     9     6     3     9     0
8    0
amappl3     88     1316    0     1302    13    12     1    13     0
8    0
amappl2     80    24067    0    23860    22    15     7    22     0
8    0
amappl1     72   221288    0   220295   199   179    20   102     0
8    0
amappl      72     1506    0     1426    12    10     2    12     0
75    0
dma16384   16384      1    0        1     1     1     0     1     0
8    0
dma4096    4096       1    0        1     1     1     0     1     0
8    0
dma2048    2048       1    0        1     1     1     0     1     0
8    0
dma512     512      256    0      256     1     1     0     1     0
8    0
dma256     256        8    0        8     1     1     0     1     0
8    0
dma64       64       65    0       65     1     1     0     1     0
8    0
dma32       32        8    0        8     1     1     0     1     0
8    0
dma16       16        1    0        1     1     1     0     1     0
8    0
aobjpl      64        1    0        0     1     0     1     1     0
8    0
uaddrrnd    24     3959    0     3934     1     0     1     1     0
8    0
uaddrbest   32        2    0        0     1     0     1     1     0
8    0
uaddr       24     3959    0     3934     1     0     1     1     0
8    0
vmmpekpl   168    31498    0    31475     2     0     2     2     0
8    0
vmmpepl    168   456735    0   454392  1096   987   109   488     0
357    2
vmsppl     352     3958    0     3934     4     1     3     4     0
8    0
pdppl      4096    3958    0     3934     5     1     4     5     0
8    0
pvpl        32  1197438    0  1186618   394   298    96   386     0
265    0
pmappl     208     3958    0     3934     2     0     2     2     0
8    0
extentpl    40      289    0      190     1     0     1     1     0
8    0
phpool     112     2985    0      382    76     1    75    75     0
8    0
ddb{1}>


maybe irrelevant for this story but i tried

ddb{1}> mach ddbcpu 2
Stopped at      x86_ipi_db+0x5: popq    %rbp
x86_ipi_db(ffffffff81648ab5) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x5a152405c75250cc, count: 12
0x41cb8c419c524153:
ddb{2}> tr /up 0t413493
ffff800023c63ee0(ffffffff81b61278,0,0,0,44400080000,1) at 0xffff800023c63ee0
end(

and ddb freezes can only reboot box ..

Reply | Threaded
Open this post in threaded view
|

Re: panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"

Hrvoje Popovski
In reply to this post by Martin Pieuchot
On 7.12.2017. 12:59, Martin Pieuchot wrote:

> If believe em_init() should only be called if IFF_RUNNING is still set.
>
> If you agree on that we should generalize this fix.
>
> Index: if_em.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.c,v
> retrieving revision 1.336
> diff -u -p -r1.336 if_em.c
> --- if_em.c 25 Jul 2017 20:45:18 -0000 1.336
> +++ if_em.c 7 Dec 2017 11:56:19 -0000
> @@ -775,7 +775,8 @@ em_watchdog(struct ifnet *ifp)
>      sc->sc_tx_desc_head, sc->sc_tx_desc_tail,
>      E1000_READ_REG(&sc->hw, TDH), E1000_READ_REG(&sc->hw, TDT));
>  
> - em_init(sc);
> + if (ifp->if_flags & IFF_RUNNING)
> + em_init(sc);
>  
>   sc->watchdog_events++;
>  }
>
>

I can trigger panic with this diff.


panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"
failed: file "/usr/src/sys/dev/pci/if_em.c", line 1535
Stopped at      db_enter+0x5:   popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  50843   8817      0         0x3          0    1  ifconfig
*338066  90412      0     0x14000      0x200    2K softnet
db_enter() at db_enter+0x5
panic() at panic+0x141
__assert(ffffffff81150c64,ffff800023bc47b0,0,ffff800000504000) at
__assert+0x24

em_stop(ffff800000504000,7) at em_stop+0x118
em_init(ffff800000504048) at em_init+0x25
em_watchdog(0) at em_watchdog+0xd1
if_watchdog_task(ffffffff812739a0) at if_watchdog_task+0xa4
taskq_thread(0) at taskq_thread+0x67
end trace frame: 0x0, count: 7
https://www.openbsd.org/ddb.html describes the minimum info required in
bug reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{2}>

ddb{2}> tr /up 0t50843
ffff800023cbddf0(ffffffff81b620f8,0,0,0,80000,1) at 0xffff800023cbddf0
end(uvm_fault(0xffffffff81bc3e20, 0xfffffffffffffff7, 0, 1) -> e
kernel: page fault trap, code=0
Faulted in DDB; continuing...

ddb{2}> tr /p 0t338066
em_devices(32202d206c6961,76615f78743e2d72,7874203d3c207367,65736e5f6d643e2d,70
616d000a212159,4c4554414944454d) at em_devices+0x31c
end of kernel
end trace frame: 0x66693e2d70666928, count: -1
ddb{2}>



ddb{2}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
  8817   50843  79736      0  7         0x3                ifconfig
 79736  119474  42105      0  3    0x10008b  pause         sh
 42105   75698  16540      0  3    0x10008b  pause         sh
 16540  320225      1      0  3    0x10008b  pause         ksh
 54705   58304      1      0  3    0x100083  ttyin         getty
 41445  395618      1      0  3    0x100083  ttyin         getty
 88810  517178      1      0  3    0x100083  ttyin         getty
 52690  519169      1      0  3    0x100083  ttyin         getty
  3983  159805      1      0  3    0x100083  ttyin         ksh
 47350  520409      1      0  3    0x100098  poll          cron
 58260  219793  77159     95  3    0x100092  kqread        smtpd
  2918   96352  77159    103  3    0x100092  kqread        smtpd
 59125  109489  77159     95  3    0x100092  kqread        smtpd
 91419  460387  77159     95  3    0x100092  kqread        smtpd
 95750  198350  77159     95  3    0x100092  kqread        smtpd
 48556  476067  77159     95  3    0x100092  kqread        smtpd
 77159  424577      1      0  3    0x100080  kqread        smtpd
 33691  342227      1      0  3        0x80  select        sshd
  9650  280076  46550     83  3    0x100092  poll          ntpd
 46550  214882  42207     83  3    0x100092  poll          ntpd
 42207  103275      1      0  3    0x100080  poll          ntpd
 27844  173279  23248     73  3    0x100010  ffs_fsync     syslogd
 23248  210379      1      0  3    0x100082  netio         syslogd
 69936  360819      0      0  3     0x14200  pgzero        zerothread
 16075  359676      0      0  3     0x14200  aiodoned      aiodoned
 49523   31272      0      0  3     0x14200  syncer        update
 10518   40696      0      0  3     0x14200  cleaner       cleaner
 99276  218425      0      0  3     0x14200  reaper        reaper
 35338   47452      0      0  3     0x14200  pgdaemon      pagedaemon
 51139  188699      0      0  3     0x14200  bored         crynlk
 39319  332427      0      0  3     0x14200  bored         crypto
 80252  394584      0      0  3     0x14200  usbtsk        usbtask
 48071   62837      0      0  3     0x14200  usbatsk       usbatsk
 32481  237123      0      0  3  0x40014200  acpi0         acpi0
 85846  447770      0      0  7  0x40014200                idle11
 81457  162415      0      0  7  0x40014200                idle10
 16117   94980      0      0  7  0x40014200                idle9
 18266  380698      0      0  7  0x40014200                idle8
 90780  430964      0      0  7  0x40014200                idle7
 10227  221054      0      0  7  0x40014200                idle6
 17559  128633      0      0  7  0x40014200                idle5
 65429  320888      0      0  7  0x40014200                idle4
 48145  270662      0      0  7  0x40014200                idle3
  6948  407710      0      0  3  0x40014200                idle2
 90544  325413      0      0  3  0x40014200                idle1
 94114  101917      0      0  3     0x14200  bored         sensors
*90412  338066      0      0  7     0x14200                softnet
 95449  344628      0      0  3     0x14200  bored         systqmp
 93814  150572      0      0  3     0x14200  bored         systq
 49337  366977      0      0  3  0x40014200  bored         softclock
 45367  148628      0      0  7  0x40014200                idle0
 88971  422009      0      0  3     0x14200  bored         sbar
     1   73555      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{2}>