trap on octeon from ifconfig cnmac2 inet 192.168.1.10 netmask 255.255.255.0

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

trap on octeon from ifconfig cnmac2 inet 192.168.1.10 netmask 255.255.255.0

Lars Noodén
The hardware is an EdgeRouter PoE and the crash can be triggered
consistently with the following (or possibly any other ipv4 address)
line in ksh for cnmac2.  cnmac0 and cmnac1 seem unable  to  cause the
crash.  cnmac3 is not found.

# ifconfig cnmac2 inet 192.168.1.10 netmask 255.255.255.0

Trap cause = 13 Frame 0x980000000b24b9d8
Trap PC 0xffffffff8124d504 RA 0xffffffff8124d434 fault 0xc000000000014cc8
0xffffffff8124d4e8 (6,9001180008001060,60,e)  ra 0xffffffff8124d434 sp
0x980000000b24bb30, sz 0m
0xffffffff8124d3e8 (6,9001180008001060,60,e)  ra 0xffffffff8124d600 sp
0x980000000b24bb30, sz 48
0xffffffff8124d5f0 (6,9001180008001060,60,e)  ra 0x0 sp 0x980000000b24bb60, sz 0
User-level: pid 14919
stopped on non ddb fault
Stopped at      0xffffffff8124d504:     teq     v1,zero
ddb>

ddb> trace
0xffffffff8124d4e8 (6,9001180008001060,60,e)  ra 0xffffffff8124d434 sp 0x980000
000b24bb30, sz 0
0xffffffff8124d3e8 (6,9001180008001060,60,e)  ra 0xffffffff8124d600 sp 0x980000
000b24bb30, sz 48
0xffffffff8124d5f0 (6,9001180008001060,60,e)  ra 0x0 sp 0x980000000b24bb60, sz 0
[0] 0:ksh*                                            "edge.lan" 19:59 08-Nov-17
User-level: pid 14919
ddb>

ddb> show panic
trap
ddb>

ddb> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
*14919  471017   1184      0  7         0x3                ifconfig
  1184  128132  86995      0  3    0x10008b  pause         ksh
 86995  225704      1      0  3    0x100080  kqread        tmux
 42641  445367  35799      0  3    0x100083  kqread        tmux
 35799  506834      1      0  3    0x10008b  pause         ksh
 81044  349952      1      0  3    0x100098  poll          cron
 72988  136191      1     99  3    0x100090  poll          sndiod
 76655  177176      1    110  3    0x100090  poll          sndiod
 20919  306529  96836     95  3    0x100092  kqread        smtpd
 74960   98935  96836    103  3    0x100092  kqread        smtpd
 13261  347566  96836     95  3    0x100092  kqread        smtpd
 34794  236112  96836     95  3    0x100092  kqread        smtpd
  7462  409042  96836     95  3    0x100092  kqread        smtpd
 49117  343017  96836     95  3    0x100092  kqread        smtpd
 96836  514241      1      0  3    0x100080  kqread        smtpd
 80246  329859      1      0  3        0x80  select        sshd
 43052   86923  29771     83  3    0x100092  poll          ntpd
 29771  396057  51974     83  3    0x100092  poll          ntpd
 51974  347217      1      0  3    0x100080  poll          ntpd
 83372  395857   3018     74  3    0x100092  bpf           pflogd
  3018  366968      1      0  3        0x80  netio         pflogd
 84944  252832  17743     73  2    0x100090                syslogd
 17743  286457      1      0  3    0x100082  netio         syslogd
 20452  479235      1     77  3    0x100090  poll          dhclient
  5305  401927      1      0  3        0x80  poll          dhclient
 57467  477863  64586    115  3    0x100092  kqread        slaacd
 73660  398139  64586    115  3    0x100092  kqread        slaacd
 64586  441031      1      0  3        0x80  kqread        slaacd
 41127    1978      0      0  2     0x14200                zerothread
 46039  440114      0      0  3     0x14200  aiodoned      aiodoned
 14076  109833      0      0  3     0x14200  syncer        update
   570   48474      0      0  3     0x14200  cleaner       cleaner
 44029   16532      0      0  3     0x14200  reaper        reaper
 42664  472885      0      0  3     0x14200  pgdaemon      pagedaemon
 80298   65124      0      0  3     0x14200  bored         crynlk
  2332  290456      0      0  3     0x14200  bored         crypto
 39853   44489      0      0  3     0x14200  usbtsk        usbtask
 37547  238835      0      0  3     0x14200  usbatsk       usbatsk
 15796  159207      0      0  3     0x14200  bored         dwc2
 24973  201745      0      0  3     0x14200  bored         softnet
 93841  329573      0      0  3     0x14200  bored         systqmp
 31395  115933      0      0  3     0x14200  bored         systq
 51472  124265      0      0  3  0x40014200  bored         softclock
 85029  366783      0      0  3  0x40014200                idle0
     1  496774      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb>

# cat /var/run/dmesg.boot
Copyright (c) 1982, 1986, 1989, 1991, 1993
        The Regents of the University of California.  All rights reserved.
Copyright (c) 1995-2017 OpenBSD. All rights reserved.  https://www.OpenBSD.org

OpenBSD 6.2-current (GENERIC) #0: Tue Nov  7 21:35:15 UTC 2017
    visa@octeon:/usr/src/sys/arch/octeon/compile/GENERIC
real mem = 536870912 (512MB)
avail mem = 524025856 (499MB)
mainbus0 at root
cpu0 at mainbus0: CN50xx CPU rev 0.1 500 MHz, Software FP emulation
cpu0: cache L1-I 32KB 4 way D 16KB 64 way, L2 128KB 8 way
clock0 at mainbus0: int 5
iobus0 at mainbus0
simplebus0 at iobus0: "soc"
octciu0 at simplebus0
cn30xxsmi0 at simplebus0
com0 at simplebus0: ns16550a, 64 byte fifo
com0: console
dwctwo0 at iobus0 base 0x1180068000000 irq 56
usb0 at dwctwo0: USB revision 2.0
uhub0 at usb0 configuration 1 interface 0 "Octeon DWC2 root hub" rev
2.00/1.00 addr 1
octrng0 at iobus0 base 0x1400000000000 irq 0
cn30xxgmx0 at iobus0 base 0x1180008000000
cnmac0 at cn30xxgmx0: RGMII, address 80:2a:a8:8e:2d:4f
atphy0 at cnmac0 phy 7: AR8035 10/100/1000 PHY, rev. 2
cnmac1 at cn30xxgmx0: RGMII, address 80:2a:a8:8e:2d:50
atphy1 at cnmac1 phy 6: AR8035 10/100/1000 PHY, rev. 2
cnmac2 at cn30xxgmx0: RGMII, address 80:2a:a8:8e:2d:51
/dev/ksyms: Symbol table not valid.
umass0 at uhub0 port 1 configuration 1 interface 0 "Kingston
DataTraveler 2.0" rev 2.00/1.00 addr 2
umass0: using SCSI over Bulk-Only
scsibus0 at umass0: 2 targets, initiator 0
sd0 at scsibus0 targ 1 lun 0: <Kingston, DataTraveler 2.0, PMAP> SCSI2
0/direct removable serial.09511665BD50BA24009C
sd0: 7498MB, 512 bytes/sector, 15356160 sectors
vscsi0 at root
scsibus1 at vscsi0: 256 targets
softraid0 at root
scsibus2 at softraid0: 256 targets
boot device: sd0
root on sd0a (e858023e9e34a57a.a) swap on sd0b dump on sd0b
WARNING: / was not properly unmounted
WARNING: No TOD clock, believing file system.
WARNING: CHECK AND RESET THE DATE!

Reply | Threaded
Open this post in threaded view
|

Re: trap on octeon from ifconfig cnmac2 inet 192.168.1.10 netmask 255.255.255.0

Bryan Vyhmeister-3
On Wed, Nov 08, 2017 at 08:53:34PM +0200, Lars Nood??n wrote:
> The hardware is an EdgeRouter PoE and the crash can be triggered
> consistently with the following (or possibly any other ipv4 address)
> line in ksh for cnmac2.  cnmac0 and cmnac1 seem unable  to  cause the
> crash.  cnmac3 is not found.

That is likely because the switch that drives the last three ports on
the EdgeRouter PoE is not supported. I have never been able to get
cnmac2 to work on the EdgeRouter PoE in any capacity. No one is
currently working on switch support as far as I know. The EdgeRouter
Lite does not have a switch so does not show this issue and cnmac2
functions on that model.

Bryan

Reply | Threaded
Open this post in threaded view
|

Re: trap on octeon from ifconfig cnmac2 inet 192.168.1.10 netmask 255.255.255.0

Stefan Sperling-5
In reply to this post by Lars Noodén
On Wed, Nov 08, 2017 at 08:53:34PM +0200, Lars Noodén wrote:
> The hardware is an EdgeRouter PoE and the crash can be triggered
> consistently with the following (or possibly any other ipv4 address)
> line in ksh for cnmac2.  cnmac0 and cmnac1 seem unable  to  cause the
> crash.  cnmac3 is not found.

Below I am quoting a message I got from visa@ back in June when I
reported the same problem in private.

I don't have access to a EdgeRouter PoE anymore so I could never
test these diffs. Could you test them?

visa@ wrote to me:

On Thu, Jun 08, 2017 at 01:03:41PM +0200, Stefan Sperling wrote:
> Hi Visa,
>
> On an EdgeRouter PoE I am seeing an issue where the cnmac2 interface does
> not attach a PHY, and 'ifconfig cnmac2 up' crashes the box.
>
> This happens with both 6.1 and -current (see below).
>
> Is this expected?

Yes, the internal port connected to the builtin switch does not work.

> Is it just that the PHY is not supported yet?

Very likely a new driver is needed. Another question is how the control
interface of the switch chip is connected to the main SoC.
The MDIO register that cnmac(4) tries to use is clearly wrong as no PHY
is found. Such a register is not even mentioned in the ubnt_e101.dts
file in Ubiquiti's GPL distribution.

> # sh /etc/netstart cnmac2
>
> Trap cause = 13 Frame 0x980000000e5eba18
> Trap PC 0xffffffff813d5a9c RA 0xffffffff813d59cc fault 0x22898e75d0
> 0xffffffff813d5a80 (6,9001180008001060,60,e)  ra 0xffffffff813d59cc sp 0x980000000e5ebb70, sz 0
> 0xffffffff813d5980 (6,9001180008001060,60,e)  ra 0xffffffff813d5b98 sp 0x980000000e5ebb70, sz 48
> 0xffffffff813d5b88 (6,9001180008001060,60,e)  ra 0x0 sp 0x980000000e5ebba0, sz 0
> User-level: pid 33307
> stopped on non ddb fault
> Stopped at      0xffffffff813d5a9c:     teq     v1,zero

This could happen because of a division by zero in
cn30xxgmx_rgmii_speed(). Could you try the (ugly) fix below?
A prebuilt kernel with the fix is in cvs:~visa/bsd.speedfix .

Index: arch/octeon/dev/cn30xxgmx.c
===================================================================
RCS file: src/sys/arch/octeon/dev/cn30xxgmx.c,v
retrieving revision 1.34
diff -u -p -r1.34 cn30xxgmx.c
--- arch/octeon/dev/cn30xxgmx.c 2 May 2017 13:26:49 -0000 1.34
+++ arch/octeon/dev/cn30xxgmx.c 8 Jun 2017 14:36:57 -0000
@@ -853,10 +853,8 @@ cn30xxgmx_rgmii_speed(struct cn30xxgmx_p
  baudrate = IF_Mbps(100);
  break;
  case RXN_RX_INBND_SPEED_125:
- baudrate = IF_Gbps(1);
- break;
  default:
- baudrate = 0/* XXX */;
+ baudrate = IF_Gbps(1);
  break;
  }
  ifp->if_baudrate = baudrate;


Because the switch port is not usable anyway, perhaps device probing
should skip it. Does the following patch work correctly on the router?
cvs:~visa/bsd.probefix is a kernel with the patch.

I do not have an ERPoe-5 myself.

Index: arch/octeon/dev/cn30xxsmi.c
===================================================================
RCS file: src/sys/arch/octeon/dev/cn30xxsmi.c,v
retrieving revision 1.4
diff -u -p -r1.4 cn30xxsmi.c
--- arch/octeon/dev/cn30xxsmi.c 2 May 2017 13:26:49 -0000 1.4
+++ arch/octeon/dev/cn30xxsmi.c 8 Jun 2017 14:43:36 -0000
@@ -181,6 +181,11 @@ cn30xxsmi_get_phy(int phandle, int port,
 
  switch (octeon_boot_info->board_type) {
  case BOARD_TYPE_UBIQUITI_E100:
+ /* XXX Skip the switch port on ERPoe-5.
+ * XXX There is no driver for it. */
+ if (port > 1 && octeon_boot_info->board_rev_major == 1)
+ return ENOENT;
+
  if (port > 2)
  return ENOENT;
  reg = 7 - port;

Reply | Threaded
Open this post in threaded view
|

Re: trap on octeon from ifconfig cnmac2 inet 192.168.1.10 netmask 255.255.255.0

Bryan Vyhmeister-3
On Wed, Nov 08, 2017 at 10:05:38PM +0100, Stefan Sperling wrote:

> Because the switch port is not usable anyway, perhaps device probing
> should skip it. Does the following patch work correctly on the router?
> cvs:~visa/bsd.probefix is a kernel with the patch.
>
> I do not have an ERPoe-5 myself.
>
> Index: arch/octeon/dev/cn30xxsmi.c
> ===================================================================
> RCS file: src/sys/arch/octeon/dev/cn30xxsmi.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 cn30xxsmi.c
> --- arch/octeon/dev/cn30xxsmi.c 2 May 2017 13:26:49 -0000 1.4
> +++ arch/octeon/dev/cn30xxsmi.c 8 Jun 2017 14:43:36 -0000
> @@ -181,6 +181,11 @@ cn30xxsmi_get_phy(int phandle, int port,
>  
>   switch (octeon_boot_info->board_type) {
>   case BOARD_TYPE_UBIQUITI_E100:
> + /* XXX Skip the switch port on ERPoe-5.
> + * XXX There is no driver for it. */
> + if (port > 1 && octeon_boot_info->board_rev_major == 1)
> + return ENOENT;
> +
>   if (port > 2)
>   return ENOENT;
>   reg = 7 - port;
>

I can confirm that this second patch does skip the switch so no cnmac2
shows up on the ER PoE. That is a big improvement since it avoids the
confusion. Would you like the first patch to be tested also?

Bryan