un-KERNEL_LOCK() TCP/UDP input & co

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
7 messages Options
Reply | Threaded
Open this post in threaded view
|

un-KERNEL_LOCK() TCP/UDP input & co

Martin Pieuchot
After auditing all the pr_input() functions, the only missing bits
for taking them out of the KERNEL_LOCK() are: ``etheripstat''.  I
leave such counter conversions for somebody else (8

In the meantime I annotated globals used in these functions.  Most
of the pseudo-interfaces have a global list that is currently protected
by the NET_LOCK().

PCB tables are also currently protected by the NET_LOCK().

carp(4) and IGMP inputs need some love, so they grab the KERNEL_LOCK()
for now.

inet{,6}ctlerrmap are read-only so they get turned into 'const'.

With this diff in we should be able to have a mostly KERNEL_LOCK()-free
softnet taskq.

ok?

Index: net/if_etherip.c
===================================================================
RCS file: /cvs/src/sys/net/if_etherip.c,v
retrieving revision 1.21
diff -u -p -r1.21 if_etherip.c
--- net/if_etherip.c 25 Oct 2017 09:24:09 -0000 1.21
+++ net/if_etherip.c 8 Nov 2017 14:41:29 -0000
@@ -434,6 +434,7 @@ ip_etherip_input(struct mbuf **mp, int *
  return IPPROTO_DONE;
  }
 
+ NET_ASSERT_LOCKED();
  LIST_FOREACH(sc, &etherip_softc_list, sc_entry) {
  if (sc->sc_src.ss_family != AF_INET ||
     sc->sc_dst.ss_family != AF_INET)
@@ -598,6 +599,7 @@ ip6_etherip_input(struct mbuf **mp, int
  in6_recoverscope(&ipsrc, &ip6->ip6_src);
  in6_recoverscope(&ipdst, &ip6->ip6_dst);
 
+ NET_ASSERT_LOCKED();
  LIST_FOREACH(sc, &etherip_softc_list, sc_entry) {
  if (sc->sc_src.ss_family != AF_INET6 ||
     sc->sc_dst.ss_family != AF_INET6)
Index: net/if_gif.c
===================================================================
RCS file: /cvs/src/sys/net/if_gif.c,v
retrieving revision 1.101
diff -u -p -r1.101 if_gif.c
--- net/if_gif.c 25 Oct 2017 09:24:09 -0000 1.101
+++ net/if_gif.c 8 Nov 2017 14:40:26 -0000
@@ -622,6 +622,7 @@ in_gif_input(struct mbuf **mp, int *offp
 
  ip = mtod(m, struct ip *);
 
+ NET_ASSERT_LOCKED();
  /* this code will be soon improved. */
  LIST_FOREACH(sc, &gif_softc_list, gif_list) {
  if (sc->gif_psrc == NULL || sc->gif_pdst == NULL ||
@@ -730,7 +731,8 @@ in6_gif_output(struct ifnet *ifp, int fa
  return 0;
 }
 
-int in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
+int
+in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
 {
  struct mbuf *m = *mp;
  struct gif_softc *sc;
@@ -747,6 +749,7 @@ int in6_gif_input(struct mbuf **mp, int
  in6_recoverscope(&src, &ip6->ip6_src);
  in6_recoverscope(&dst, &ip6->ip6_dst);
 
+ NET_ASSERT_LOCKED();
  LIST_FOREACH(sc, &gif_softc_list, gif_list) {
  if (sc->gif_psrc == NULL || sc->gif_pdst == NULL ||
     sc->gif_psrc->sa_family != AF_INET6 ||
Index: net/if_pfsync.c
===================================================================
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.254
diff -u -p -r1.254 if_pfsync.c
--- net/if_pfsync.c 11 Aug 2017 21:24:19 -0000 1.254
+++ net/if_pfsync.c 8 Nov 2017 14:42:49 -0000
@@ -659,6 +659,8 @@ pfsync_input(struct mbuf **mp, int *offp
  int offset, noff, len, count, mlen, flags = 0;
  int e;
 
+ NET_ASSERT_LOCKED();
+
  pfsyncstat_inc(pfsyncs_ipackets);
 
  /* verify that we have a sync interface configured */
Index: net/if_vxlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.63
diff -u -p -r1.63 if_vxlan.c
--- net/if_vxlan.c 25 Oct 2017 09:24:09 -0000 1.63
+++ net/if_vxlan.c 8 Nov 2017 14:49:58 -0000
@@ -611,6 +611,7 @@ vxlan_lookup(struct mbuf *m, struct udph
  vni = VXLAN_VNI_UNSET;
  }
 
+ NET_ASSERT_LOCKED();
  /* First search for a vxlan(4) interface with the packet's VNI */
  LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) {
  if ((uh->uh_dport == sc->sc_dstport) &&
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1043
diff -u -p -r1.1043 pf.c
--- net/pf.c 31 Oct 2017 22:05:12 -0000 1.1043
+++ net/pf.c 8 Nov 2017 15:03:35 -0000
@@ -3184,12 +3184,14 @@ pf_socket_lookup(struct pf_pdesc *pd)
  sport = pd->hdr.tcp.th_sport;
  dport = pd->hdr.tcp.th_dport;
  PF_ASSERT_LOCKED();
+ NET_ASSERT_LOCKED();
  tb = &tcbtable;
  break;
  case IPPROTO_UDP:
  sport = pd->hdr.udp.uh_sport;
  dport = pd->hdr.udp.uh_dport;
  PF_ASSERT_LOCKED();
+ NET_ASSERT_LOCKED();
  tb = &udbtable;
  break;
  default:
Index: net/pipex.c
===================================================================
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.105
diff -u -p -r1.105 pipex.c
--- net/pipex.c 11 Aug 2017 21:24:19 -0000 1.105
+++ net/pipex.c 8 Nov 2017 15:08:06 -0000
@@ -637,6 +637,7 @@ pipex_lookup_by_session_id(int protocol,
  struct pipex_hash_head *list;
  struct pipex_session *session;
 
+ NET_ASSERT_LOCKED();
  list = PIPEX_ID_HASHTABLE(session_id);
  LIST_FOREACH(session, list, id_chain) {
  if (session->protocol == protocol &&
Index: netinet/igmp.c
===================================================================
RCS file: /cvs/src/sys/netinet/igmp.c,v
retrieving revision 1.71
diff -u -p -r1.71 igmp.c
--- netinet/igmp.c 29 Oct 2017 14:56:36 -0000 1.71
+++ netinet/igmp.c 8 Nov 2017 14:37:06 -0000
@@ -177,6 +177,7 @@ rti_find(struct ifnet *ifp)
 {
  struct router_info *rti;
 
+ KERNEL_ASSERT_LOCKED();
  for (rti = rti_head; rti != 0; rti = rti->rti_next) {
  if (rti->rti_ifidx == ifp->if_index)
  return (rti);
@@ -221,7 +222,9 @@ igmp_input(struct mbuf **mp, int *offp,
  return IPPROTO_DONE;
  }
 
+ KERNEL_LOCK();
  proto = igmp_input_if(ifp, mp, offp, proto, af);
+ KERNEL_UNLOCK();
  if_put(ifp);
  return proto;
 }
Index: netinet/in.h
===================================================================
RCS file: /cvs/src/sys/netinet/in.h,v
retrieving revision 1.125
diff -u -p -r1.125 in.h
--- netinet/in.h 6 Oct 2017 21:14:55 -0000 1.125
+++ netinet/in.h 8 Nov 2017 15:14:23 -0000
@@ -794,7 +794,7 @@ __END_DECLS
 #endif /* !_KERNEL */
 
 #ifdef _KERNEL
-extern   int inetctlerrmap[];
+extern   const int inetctlerrmap[];
 extern   struct in_addr zeroin_addr;
 
 struct mbuf;
Index: netinet/ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.317
diff -u -p -r1.317 ip_carp.c
--- netinet/ip_carp.c 16 Oct 2017 13:20:20 -0000 1.317
+++ netinet/ip_carp.c 8 Nov 2017 14:30:24 -0000
@@ -505,7 +505,9 @@ carp_proto_input_if(struct ifnet *ifp, s
  }
  m->m_data -= iplen;
 
+ KERNEL_LOCK();
  carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
+ KERNEL_UNLOCK();
  return IPPROTO_DONE;
 }
 
@@ -580,7 +582,9 @@ carp6_proto_input_if(struct ifnet *ifp,
  }
  m->m_data -= *offp;
 
+ KERNEL_LOCK();
  carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
+ KERNEL_UNLOCK();
  return IPPROTO_DONE;
 }
 #endif /* INET6 */
@@ -1514,7 +1518,7 @@ carp_lsdrop(struct mbuf *m, sa_family_t
  m_tag_delete(m, mtag);
  m->m_flags &= ~M_MCAST;
  }
-
+
  /*
  * Return without making a drop decision. This allows to clear the
  * M_MCAST flag and do nothing else.
Index: netinet/ip_ether.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_ether.c,v
retrieving revision 1.88
diff -u -p -r1.88 ip_ether.c
--- netinet/ip_ether.c 6 Nov 2017 15:12:43 -0000 1.88
+++ netinet/ip_ether.c 8 Nov 2017 14:24:56 -0000
@@ -296,6 +296,7 @@ etherip_getgif(struct mbuf *m)
  return NULL;
  }
 
+ NET_ASSERT_LOCKED();
  /* Find appropriate gif(4) interface */
  LIST_FOREACH(sc, &gif_softc_list, gif_list) {
  if ((sc->gif_psrc == NULL) ||
Index: netinet/ip_gre.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_gre.c,v
retrieving revision 1.67
diff -u -p -r1.67 ip_gre.c
--- netinet/ip_gre.c 9 Oct 2017 08:35:38 -0000 1.67
+++ netinet/ip_gre.c 8 Nov 2017 14:28:25 -0000
@@ -356,6 +356,7 @@ gre_lookup(struct mbuf *m, u_int8_t prot
  struct ip *ip = mtod(m, struct ip *);
  struct gre_softc *sc;
 
+ NET_ASSERT_LOCKED();
  LIST_FOREACH(sc, &gre_softc_list, sc_list) {
  if ((sc->g_dst.s_addr == ip->ip_src.s_addr) &&
     (sc->g_src.s_addr == ip->ip_dst.s_addr) &&
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.329
diff -u -p -r1.329 ip_input.c
--- netinet/ip_input.c 5 Nov 2017 13:19:59 -0000 1.329
+++ netinet/ip_input.c 8 Nov 2017 15:14:10 -0000
@@ -1402,7 +1402,7 @@ ip_stripoptions(struct mbuf *m)
  ip->ip_len = htons(ntohs(ip->ip_len) - olen);
 }
 
-int inetctlerrmap[PRC_NCMDS] = {
+const int inetctlerrmap[PRC_NCMDS] = {
  0, 0, 0, 0,
  0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
  EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
Index: netinet/ipsec_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.158
diff -u -p -r1.158 ipsec_input.c
--- netinet/ipsec_input.c 6 Nov 2017 15:12:43 -0000 1.158
+++ netinet/ipsec_input.c 8 Nov 2017 15:17:25 -0000
@@ -779,6 +779,7 @@ ipsec_common_ctlinput(u_int rdomain, int
  return;
 
  /* Walk the chain backwards to the first tdb */
+ NET_ASSERT_LOCKED();
  for (; tdbp; tdbp = tdbp->tdb_inext) {
  if (tdbp->tdb_flags & TDBF_INVALID ||
     (adjust = ipsec_hdrsz(tdbp)) == -1)
Index: netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.105
diff -u -p -r1.105 raw_ip.c
--- netinet/raw_ip.c 2 Nov 2017 14:01:18 -0000 1.105
+++ netinet/raw_ip.c 8 Nov 2017 15:02:19 -0000
@@ -128,6 +128,7 @@ rip_input(struct mbuf **mp, int *offp, i
  KASSERT(af == AF_INET);
 
  ripsrc.sin_addr = ip->ip_src;
+ NET_ASSERT_LOCKED();
  TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
  if (inp->inp_socket->so_state & SS_CANTRCVMORE)
  continue;
@@ -512,10 +513,11 @@ rip_attach(struct socket *so, int proto)
  if (proto < 0 || proto >= IPPROTO_MAX)
  return EPROTONOSUPPORT;
 
- if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
-    (error = in_pcballoc(so, &rawcbtable))) {
+ if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
+ return error;
+ NET_ASSERT_LOCKED();
+ if ((error = in_pcballoc(so, &rawcbtable)))
  return error;
- }
  inp = sotoinpcb(so);
  inp->inp_ip.ip_p = proto;
  return 0;
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.350
diff -u -p -r1.350 tcp_input.c
--- netinet/tcp_input.c 25 Oct 2017 12:38:21 -0000 1.350
+++ netinet/tcp_input.c 8 Nov 2017 14:54:07 -0000
@@ -579,6 +579,7 @@ findpcb:
  }
  KASSERT(sotoinpcb(inp->inp_socket) == inp);
  KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp);
+ soassertlocked(inp->inp_socket);
 
  /* Check the minimum TTL for socket. */
  switch (af) {
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.159
diff -u -p -r1.159 tcp_usrreq.c
--- netinet/tcp_usrreq.c 2 Nov 2017 14:01:18 -0000 1.159
+++ netinet/tcp_usrreq.c 8 Nov 2017 15:21:16 -0000
@@ -576,6 +576,7 @@ tcp_attach(struct socket *so, int proto)
  return (error);
  }
 
+ NET_ASSERT_LOCKED();
  error = in_pcballoc(so, &tcbtable);
  if (error)
  return (error);
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.242
diff -u -p -r1.242 udp_usrreq.c
--- netinet/udp_usrreq.c 2 Nov 2017 14:01:18 -0000 1.242
+++ netinet/udp_usrreq.c 8 Nov 2017 15:04:19 -0000
@@ -383,6 +383,7 @@ udp_input(struct mbuf **mp, int *offp, i
  * (Algorithm copied from raw_intr().)
  */
  last = NULL;
+ NET_ASSERT_LOCKED();
  TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
  if (inp->inp_socket->so_state & SS_CANTRCVMORE)
  continue;
@@ -564,6 +565,7 @@ udp_input(struct mbuf **mp, int *offp, i
  }
  }
  KASSERT(sotoinpcb(inp->inp_socket) == inp);
+ soassertlocked(inp->inp_socket);
 
 #ifdef INET6
  if (ip6 && inp->inp_ip6_minhlim &&
@@ -1254,8 +1256,11 @@ udp_attach(struct socket *so, int proto)
  if (so->so_pcb != NULL)
  return EINVAL;
 
- if ((error = soreserve(so, udp_sendspace, udp_recvspace)) ||
-    (error = in_pcballoc(so, &udbtable)))
+ if ((error = soreserve(so, udp_sendspace, udp_recvspace)))
+ return error;
+
+ NET_ASSERT_LOCKED();
+ if ((error = in_pcballoc(so, &udbtable)))
  return error;
 #ifdef INET6
  if (sotoinpcb(so)->inp_flags & INP_IPV6)
Index: netinet6/in6.h
===================================================================
RCS file: /cvs/src/sys/netinet6/in6.h,v
retrieving revision 1.99
diff -u -p -r1.99 in6.h
--- netinet6/in6.h 1 Sep 2017 16:48:27 -0000 1.99
+++ netinet6/in6.h 8 Nov 2017 15:18:51 -0000
@@ -404,7 +404,7 @@ typedef __socklen_t socklen_t; /* length
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
-extern u_char inet6ctlerrmap[];
+extern const u_char inet6ctlerrmap[];
 extern struct in6_addr zeroin6_addr;
 
 struct mbuf;
Index: netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.207
diff -u -p -r1.207 ip6_input.c
--- netinet6/ip6_input.c 1 Nov 2017 06:35:38 -0000 1.207
+++ netinet6/ip6_input.c 8 Nov 2017 15:19:09 -0000
@@ -1344,7 +1344,7 @@ ip6_lasthdr(struct mbuf *m, int off, int
  * System control for IP6
  */
 
-u_char inet6ctlerrmap[PRC_NCMDS] = {
+const u_char inet6ctlerrmap[PRC_NCMDS] = {
  0, 0, 0, 0,
  0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
  EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.122
diff -u -p -r1.122 raw_ip6.c
--- netinet6/raw_ip6.c 2 Nov 2017 14:01:18 -0000 1.122
+++ netinet6/raw_ip6.c 8 Nov 2017 15:02:04 -0000
@@ -136,6 +136,7 @@ rip6_input(struct mbuf **mp, int *offp,
  /* KAME hack: recover scopeid */
  in6_recoverscope(&rip6src, &ip6->ip6_src);
 
+ NET_ASSERT_LOCKED();
  TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
  if (in6p->inp_socket->so_state & SS_CANTRCVMORE)
  continue;
@@ -695,8 +696,10 @@ rip6_attach(struct socket *so, int proto
  if (proto < 0 || proto >= IPPROTO_MAX)
  return EPROTONOSUPPORT;
 
- if ((error = soreserve(so, rip6_sendspace, rip6_recvspace)) ||
-    (error = in_pcballoc(so, &rawin6pcbtable)))
+ if ((error = soreserve(so, rip6_sendspace, rip6_recvspace)))
+ return error;
+ NET_ASSERT_LOCKED();
+ if ((error = in_pcballoc(so, &rawin6pcbtable)))
  return error;
 
  in6p = sotoinpcb(so);

Reply | Threaded
Open this post in threaded view
|

Re: un-KERNEL_LOCK() TCP/UDP input & co

Alexandr Nedvedicky
Hello,

your change looks good to me. Though I have a comment/question to your diff.


> Index: net/if_vxlan.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_vxlan.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 if_vxlan.c
> --- net/if_vxlan.c 25 Oct 2017 09:24:09 -0000 1.63
> +++ net/if_vxlan.c 8 Nov 2017 14:49:58 -0000
> @@ -611,6 +611,7 @@ vxlan_lookup(struct mbuf *m, struct udph
>   vni = VXLAN_VNI_UNSET;
>   }
>  
> + NET_ASSERT_LOCKED();
>   /* First search for a vxlan(4) interface with the packet's VNI */
>   LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) {
>   if ((uh->uh_dport == sc->sc_dstport) &&
> Index: net/pf.c

    I think we are approaching point, which requires us to revisit
    NET_ASSERT_LOCKED(). The assert currently tests caller is writer
    on net_lock.

    Since we are going to 'soften' the NET_LOCK() to R-lock for
    some tasks/threads the NET_ASSERT_LOCKED() will become invalid
    (false positive) assertion for functions, which are being grabbed
    occasionally as readers and other times as writers (?typically in
    local outbound path?). I've seen such smoke already in my diffs
    I'm working on currently.

    So I'd like to know, what's the plan for NET_ASSERT_LOCKED() macro?

        a) are we going to relax it, so it will test if the net_lock is
        locked?

        b) are we going to keep it, but a new 'soft' assert will be introduced
        e.g. NET_ASSERT_ALOCKED() (A for any lock)?

        c) add parameter to current NET_ASSERT_LOCKED() stating desired
        lock level: 0 - unlocked, 1 - reader, 2 - writer

    I'm leaning towards option b) introduce a new assertion for cases
    where we require lock, but we don't care if it is reader/writer.

As I've said the patch looks good to me as-is and should go in. I just
would like to hear some greater plan for NET_ASSERT_LOCKED(). Perhaps
we should go for it before we will be further playing with parallel
softnet.

thanks a lot
regards
sasha

Reply | Threaded
Open this post in threaded view
|

Re: un-KERNEL_LOCK() TCP/UDP input & co

Martin Pieuchot
On 13/11/17(Mon) 12:14, Alexandr Nedvedicky wrote:

> your change looks good to me. Though I have a comment/question to your diff.
>
> > Index: net/if_vxlan.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_vxlan.c,v
> > retrieving revision 1.63
> > diff -u -p -r1.63 if_vxlan.c
> > --- net/if_vxlan.c 25 Oct 2017 09:24:09 -0000 1.63
> > +++ net/if_vxlan.c 8 Nov 2017 14:49:58 -0000
> > @@ -611,6 +611,7 @@ vxlan_lookup(struct mbuf *m, struct udph
> >   vni = VXLAN_VNI_UNSET;
> >   }
> >  
> > + NET_ASSERT_LOCKED();
> >   /* First search for a vxlan(4) interface with the packet's VNI */
> >   LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) {
> >   if ((uh->uh_dport == sc->sc_dstport) &&
> > Index: net/pf.c
>
>     I think we are approaching point, which requires us to revisit
>     NET_ASSERT_LOCKED(). The assert currently tests caller is writer
>     on net_lock.

Since last week NET_ASSERT_LOCKED() is checking if the calling thread
owns the (write) lock or if it is hold by at least one reader.

Without changing our rwlock implementation there's no way to check if
the calling thread is holding a read lock.  That would be definitively
useful.

>     Since we are going to 'soften' the NET_LOCK() to R-lock for
>     some tasks/threads the NET_ASSERT_LOCKED() will become invalid
>     (false positive) assertion for functions, which are being grabbed
>     occasionally as readers and other times as writers (?typically in
>     local outbound path?). I've seen such smoke already in my diffs
>     I'm working on currently.
>
>     So I'd like to know, what's the plan for NET_ASSERT_LOCKED() macro?
>
> a) are we going to relax it, so it will test if the net_lock is
> locked?

Yep, that's already done.

> b) are we going to keep it, but a new 'soft' assert will be introduced
> e.g. NET_ASSERT_ALOCKED() (A for any lock)?

We should turn NET_ASSERT_LOCKED() macros that require a write lock to
NET_ASSERT_WLOCKED().  But that's mostly in the ioctl(2) path.

> c) add parameter to current NET_ASSERT_LOCKED() stating desired
> lock level: 0 - unlocked, 1 - reader, 2 - writer

I don't care how the API looks like, I'd just argue for coherency
between all or locks.

> As I've said the patch looks good to me as-is and should go in. I just
> would like to hear some greater plan for NET_ASSERT_LOCKED(). Perhaps
> we should go for it before we will be further playing with parallel
> softnet.

I'd love is somebody could do the changes in the rwlock API to let us
check if we're holding a lock.  Maybe this is already present in
witness(4), visa?

Reply | Threaded
Open this post in threaded view
|

Re: un-KERNEL_LOCK() TCP/UDP input & co

Alexandr Nedvedicky
Hello,
</snip>
> >     So I'd like to know, what's the plan for NET_ASSERT_LOCKED() macro?
> >
> > a) are we going to relax it, so it will test if the net_lock is
> > locked?
>
> Yep, that's already done.

    cool, thanks a lot. I've just noticed the change is there while ago, while
    reading another thread here [1] (assertion "_kernel_lock_held()" failed,
    uipc_socket2.c: ipsec)


thanks and
regards
sasha

[1] https://marc.info/?l=openbsd-tech&m=151052226224227&w=2 

Reply | Threaded
Open this post in threaded view
|

Re: un-KERNEL_LOCK() TCP/UDP input & co

Visa Hankala-2
In reply to this post by Martin Pieuchot
On Mon, Nov 13, 2017 at 01:25:42PM +0100, Martin Pieuchot wrote:
> I'd love is somebody could do the changes in the rwlock API to let us
> check if we're holding a lock.  Maybe this is already present in
> witness(4), visa?

witness(4) does maintain data about lock ownership. The patch below
might do the trick for rwlocks without adding extra state tracking.

The patch also fixes the witness-side initialization of statically
initialized locks, in functions witness_checkorder() and
witness_lock(). The fix belongs to a separate commit though.

Index: share/man/man9/rwlock.9
===================================================================
RCS file: src/share/man/man9/rwlock.9,v
retrieving revision 1.20
diff -u -p -r1.20 rwlock.9
--- share/man/man9/rwlock.9 30 Oct 2017 13:33:36 -0000 1.20
+++ share/man/man9/rwlock.9 13 Nov 2017 17:05:51 -0000
@@ -33,6 +33,7 @@
 .Nm rw_assert_anylock ,
 .Nm rw_assert_unlocked ,
 .Nm rw_status ,
+.Nm rw_status_curproc ,
 .Nm RWLOCK_INITIALIZER ,
 .Nm rrw_init ,
 .Nm rrw_init_flags ,
@@ -68,6 +69,8 @@
 .Fn rw_assert_unlocked "struct rwlock *rwl"
 .Ft int
 .Fn rw_status "struct rwlock *rwl"
+.Ft int
+.Fn rw_status_curproc "struct rwlock *rwl"
 .Fn RWLOCK_INITIALIZER "const char *name"
 .Ft void
 .Fn rrw_init "struct rrwlock *rrwl" "const char *name"
@@ -181,7 +184,7 @@ functions check the status
 .Fa rwl ,
 panicking if it is not write-, read-, any-, or unlocked, respectively.
 .Pp
-.Nm rw_status
+.Fn rw_status
 returns the current state of the lock:
 .Pp
 .Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
@@ -194,6 +197,22 @@ Lock is read locked.
 The current thread may be one of the threads that has it locked.
 .It 0
 Lock is not locked.
+.El
+.Pp
+.Fn rw_status_curproc
+returns the current state of the lock relative to the calling thread:
+.Pp
+.Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
+.It Dv RW_WRITE
+Lock is write locked by the calling thread.
+.It Dv RW_READ
+Lock is read locked.
+The current thread may be one of the threads that has it locked.
+If the kernel has been compiled with
+.Xr witness 4 ,
+the status is exact, and the lock is read locked by the current thread.
+.It 0
+Lock is not locked by the calling thread.
 .El
 .Pp
 A lock declaration may be initialised with the
Index: sys/kern/kern_rwlock.c
===================================================================
RCS file: src/sys/kern/kern_rwlock.c,v
retrieving revision 1.32
diff -u -p -r1.32 kern_rwlock.c
--- sys/kern/kern_rwlock.c 24 Oct 2017 08:53:15 -0000 1.32
+++ sys/kern/kern_rwlock.c 13 Nov 2017 17:05:51 -0000
@@ -325,6 +325,34 @@ rw_status(struct rwlock *rwl)
  return (0);
 }
 
+int
+rw_status_curproc(struct rwlock *rwl)
+{
+ unsigned long owner;
+#ifdef WITNESS
+ int status = witness_status(&rwl->rwl_lock_obj);
+
+ if (status != -1) {
+ if (status & LA_XLOCKED)
+ return (RW_WRITE);
+ if (status & LA_SLOCKED)
+ return (RW_READ);
+ return (0);
+ }
+#endif
+
+ owner = rwl->rwl_owner;
+ if (owner & RWLOCK_WRLOCK) {
+ if (RW_PROC(curproc) == RW_PROC(owner))
+ return (RW_WRITE);
+ else
+ return (0);
+ }
+ if (owner)
+ return RW_READ;
+ return (0);
+}
+
 #ifdef DIAGNOSTIC
 void
 rw_assert_wrlock(struct rwlock *rwl)
Index: sys/kern/subr_witness.c
===================================================================
RCS file: src/sys/kern/subr_witness.c,v
retrieving revision 1.4
diff -u -p -r1.4 subr_witness.c
--- sys/kern/subr_witness.c 12 Aug 2017 03:13:23 -0000 1.4
+++ sys/kern/subr_witness.c 13 Nov 2017 17:05:51 -0000
@@ -744,8 +744,8 @@ witness_checkorder(struct lock_object *l
  struct witness *w, *w1;
  int i, j, s;
 
- if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL ||
-    panicstr != NULL)
+ if (witness_cold || witness_watch < 1 || panicstr != NULL ||
+    (lock->lo_flags & LO_WITNESS) == 0)
  return;
 
  w = lock->lo_witness;
@@ -1078,8 +1078,8 @@ witness_lock(struct lock_object *lock, i
  struct witness *w;
  int s;
 
- if (witness_cold || witness_watch == -1 || lock->lo_witness == NULL ||
-    panicstr != NULL)
+ if (witness_cold || witness_watch == -1 || panicstr != NULL ||
+    (lock->lo_flags & LO_WITNESS) == 0)
  return;
 
  w = lock->lo_witness;
@@ -2004,6 +2004,42 @@ witness_assert(const struct lock_object
 
  }
 #endif /* INVARIANT_SUPPORT */
+}
+
+int
+witness_status(struct lock_object *lock)
+{
+ struct lock_instance *instance;
+ struct lock_class *class;
+ int status = 0;
+
+ if (witness_cold || witness_watch < 1 || panicstr != NULL ||
+    (lock->lo_flags & LO_WITNESS) == 0)
+ return -1;
+
+ class = LOCK_CLASS(lock);
+ if ((class->lc_flags & LC_SLEEPLOCK) != 0)
+ instance = find_instance(curproc->p_sleeplocks, lock);
+ else if ((class->lc_flags & LC_SPINLOCK) != 0)
+ instance = find_instance(
+    witness_cpu[cpu_number()].wc_spinlocks, lock);
+ else
+ panic("lock (%s) %s is not sleep or spin!",
+    class->lc_name, lock->lo_name);
+
+ if (instance != NULL) {
+ status |= LA_LOCKED;
+ if (instance->li_flags & LI_EXCLUSIVE)
+ status |= LA_XLOCKED;
+ else
+ status |= LA_SLOCKED;
+ if (instance->li_flags & LI_RECURSEMASK)
+ status |= LA_RECURSED;
+ else
+ status |= LA_NOTRECURSED;
+ }
+
+ return (status);
 }
 
 static void
Index: sys/sys/rwlock.h
===================================================================
RCS file: src/sys/sys/rwlock.h,v
retrieving revision 1.22
diff -u -p -r1.22 rwlock.h
--- sys/sys/rwlock.h 12 Aug 2017 23:27:44 -0000 1.22
+++ sys/sys/rwlock.h 13 Nov 2017 17:05:51 -0000
@@ -170,6 +170,7 @@ void rw_assert_unlocked(struct rwlock *)
 int _rw_enter(struct rwlock *, int LOCK_FL_VARS);
 void _rw_exit(struct rwlock * LOCK_FL_VARS);
 int rw_status(struct rwlock *);
+int rw_status_curproc(struct rwlock *);
 
 #define rw_enter(rwl, flags) _rw_enter(rwl, flags LOCK_FILE_LINE)
 #define rw_exit(rwl) _rw_exit(rwl LOCK_FILE_LINE)
Index: sys/sys/systm.h
===================================================================
RCS file: src/sys/sys/systm.h,v
retrieving revision 1.135
diff -u -p -r1.135 systm.h
--- sys/sys/systm.h 13 Nov 2017 14:41:46 -0000 1.135
+++ sys/sys/systm.h 13 Nov 2017 17:05:52 -0000
@@ -323,7 +323,7 @@ do { \
 
 #define NET_ASSERT_LOCKED() \
 do { \
- int _s = rw_status(&netlock); \
+ int _s = rw_status_curproc(&netlock); \
  if ((splassert_ctl > 0) && (_s != RW_WRITE && _s != RW_READ)) \
  splassert_fail(RW_READ, _s, __func__); \
 } while (0)
Index: sys/sys/witness.h
===================================================================
RCS file: src/sys/sys/witness.h,v
retrieving revision 1.1
diff -u -p -r1.1 witness.h
--- sys/sys/witness.h 20 Apr 2017 12:59:36 -0000 1.1
+++ sys/sys/witness.h 13 Nov 2017 17:05:52 -0000
@@ -88,6 +88,7 @@ int witness_line(struct lock_object *);
 void witness_norelease(struct lock_object *);
 void witness_releaseok(struct lock_object *);
 const char *witness_file(struct lock_object *);
+int witness_status(struct lock_object *);
 void witness_thread_exit(struct proc *);
 
 #ifdef WITNESS

Reply | Threaded
Open this post in threaded view
|

Re: un-KERNEL_LOCK() TCP/UDP input & co

Martin Pieuchot
On 13/11/17(Mon) 17:29, Visa Hankala wrote:
> On Mon, Nov 13, 2017 at 01:25:42PM +0100, Martin Pieuchot wrote:
> > I'd love is somebody could do the changes in the rwlock API to let us
> > check if we're holding a lock.  Maybe this is already present in
> > witness(4), visa?
>
> witness(4) does maintain data about lock ownership. The patch below
> might do the trick for rwlocks without adding extra state tracking.

I wouldn't mind a solution that's always on.  I'm afraid it will take
some time before we can enable WITNESS regularly.

> The patch also fixes the witness-side initialization of statically
> initialized locks, in functions witness_checkorder() and
> witness_lock(). The fix belongs to a separate commit though.

You should commit that.

Note that the diff below still includes a false positive.  If the
current thread doesn't have the read-lock but another one has it,
then RW_READ will still be returned.

> Index: share/man/man9/rwlock.9
> ===================================================================
> RCS file: src/share/man/man9/rwlock.9,v
> retrieving revision 1.20
> diff -u -p -r1.20 rwlock.9
> --- share/man/man9/rwlock.9 30 Oct 2017 13:33:36 -0000 1.20
> +++ share/man/man9/rwlock.9 13 Nov 2017 17:05:51 -0000
> @@ -33,6 +33,7 @@
>  .Nm rw_assert_anylock ,
>  .Nm rw_assert_unlocked ,
>  .Nm rw_status ,
> +.Nm rw_status_curproc ,
>  .Nm RWLOCK_INITIALIZER ,
>  .Nm rrw_init ,
>  .Nm rrw_init_flags ,
> @@ -68,6 +69,8 @@
>  .Fn rw_assert_unlocked "struct rwlock *rwl"
>  .Ft int
>  .Fn rw_status "struct rwlock *rwl"
> +.Ft int
> +.Fn rw_status_curproc "struct rwlock *rwl"
>  .Fn RWLOCK_INITIALIZER "const char *name"
>  .Ft void
>  .Fn rrw_init "struct rrwlock *rrwl" "const char *name"
> @@ -181,7 +184,7 @@ functions check the status
>  .Fa rwl ,
>  panicking if it is not write-, read-, any-, or unlocked, respectively.
>  .Pp
> -.Nm rw_status
> +.Fn rw_status
>  returns the current state of the lock:
>  .Pp
>  .Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
> @@ -194,6 +197,22 @@ Lock is read locked.
>  The current thread may be one of the threads that has it locked.
>  .It 0
>  Lock is not locked.
> +.El
> +.Pp
> +.Fn rw_status_curproc
> +returns the current state of the lock relative to the calling thread:
> +.Pp
> +.Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
> +.It Dv RW_WRITE
> +Lock is write locked by the calling thread.
> +.It Dv RW_READ
> +Lock is read locked.
> +The current thread may be one of the threads that has it locked.
> +If the kernel has been compiled with
> +.Xr witness 4 ,
> +the status is exact, and the lock is read locked by the current thread.
> +.It 0
> +Lock is not locked by the calling thread.
>  .El
>  .Pp
>  A lock declaration may be initialised with the
> Index: sys/kern/kern_rwlock.c
> ===================================================================
> RCS file: src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.32
> diff -u -p -r1.32 kern_rwlock.c
> --- sys/kern/kern_rwlock.c 24 Oct 2017 08:53:15 -0000 1.32
> +++ sys/kern/kern_rwlock.c 13 Nov 2017 17:05:51 -0000
> @@ -325,6 +325,34 @@ rw_status(struct rwlock *rwl)
>   return (0);
>  }
>  
> +int
> +rw_status_curproc(struct rwlock *rwl)
> +{
> + unsigned long owner;
> +#ifdef WITNESS
> + int status = witness_status(&rwl->rwl_lock_obj);
> +
> + if (status != -1) {
> + if (status & LA_XLOCKED)
> + return (RW_WRITE);
> + if (status & LA_SLOCKED)
> + return (RW_READ);
> + return (0);
> + }
> +#endif
> +
> + owner = rwl->rwl_owner;
> + if (owner & RWLOCK_WRLOCK) {
> + if (RW_PROC(curproc) == RW_PROC(owner))
> + return (RW_WRITE);
> + else
> + return (0);
> + }
> + if (owner)
> + return RW_READ;
> + return (0);
> +}
> +
>  #ifdef DIAGNOSTIC
>  void
>  rw_assert_wrlock(struct rwlock *rwl)
> Index: sys/kern/subr_witness.c
> ===================================================================
> RCS file: src/sys/kern/subr_witness.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 subr_witness.c
> --- sys/kern/subr_witness.c 12 Aug 2017 03:13:23 -0000 1.4
> +++ sys/kern/subr_witness.c 13 Nov 2017 17:05:51 -0000
> @@ -744,8 +744,8 @@ witness_checkorder(struct lock_object *l
>   struct witness *w, *w1;
>   int i, j, s;
>  
> - if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL ||
> -    panicstr != NULL)
> + if (witness_cold || witness_watch < 1 || panicstr != NULL ||
> +    (lock->lo_flags & LO_WITNESS) == 0)
>   return;
>  
>   w = lock->lo_witness;
> @@ -1078,8 +1078,8 @@ witness_lock(struct lock_object *lock, i
>   struct witness *w;
>   int s;
>  
> - if (witness_cold || witness_watch == -1 || lock->lo_witness == NULL ||
> -    panicstr != NULL)
> + if (witness_cold || witness_watch == -1 || panicstr != NULL ||
> +    (lock->lo_flags & LO_WITNESS) == 0)
>   return;
>  
>   w = lock->lo_witness;
> @@ -2004,6 +2004,42 @@ witness_assert(const struct lock_object
>  
>   }
>  #endif /* INVARIANT_SUPPORT */
> +}
> +
> +int
> +witness_status(struct lock_object *lock)
> +{
> + struct lock_instance *instance;
> + struct lock_class *class;
> + int status = 0;
> +
> + if (witness_cold || witness_watch < 1 || panicstr != NULL ||
> +    (lock->lo_flags & LO_WITNESS) == 0)
> + return -1;
> +
> + class = LOCK_CLASS(lock);
> + if ((class->lc_flags & LC_SLEEPLOCK) != 0)
> + instance = find_instance(curproc->p_sleeplocks, lock);
> + else if ((class->lc_flags & LC_SPINLOCK) != 0)
> + instance = find_instance(
> +    witness_cpu[cpu_number()].wc_spinlocks, lock);
> + else
> + panic("lock (%s) %s is not sleep or spin!",
> +    class->lc_name, lock->lo_name);
> +
> + if (instance != NULL) {
> + status |= LA_LOCKED;
> + if (instance->li_flags & LI_EXCLUSIVE)
> + status |= LA_XLOCKED;
> + else
> + status |= LA_SLOCKED;
> + if (instance->li_flags & LI_RECURSEMASK)
> + status |= LA_RECURSED;
> + else
> + status |= LA_NOTRECURSED;
> + }
> +
> + return (status);
>  }
>  
>  static void
> Index: sys/sys/rwlock.h
> ===================================================================
> RCS file: src/sys/sys/rwlock.h,v
> retrieving revision 1.22
> diff -u -p -r1.22 rwlock.h
> --- sys/sys/rwlock.h 12 Aug 2017 23:27:44 -0000 1.22
> +++ sys/sys/rwlock.h 13 Nov 2017 17:05:51 -0000
> @@ -170,6 +170,7 @@ void rw_assert_unlocked(struct rwlock *)
>  int _rw_enter(struct rwlock *, int LOCK_FL_VARS);
>  void _rw_exit(struct rwlock * LOCK_FL_VARS);
>  int rw_status(struct rwlock *);
> +int rw_status_curproc(struct rwlock *);
>  
>  #define rw_enter(rwl, flags) _rw_enter(rwl, flags LOCK_FILE_LINE)
>  #define rw_exit(rwl) _rw_exit(rwl LOCK_FILE_LINE)
> Index: sys/sys/systm.h
> ===================================================================
> RCS file: src/sys/sys/systm.h,v
> retrieving revision 1.135
> diff -u -p -r1.135 systm.h
> --- sys/sys/systm.h 13 Nov 2017 14:41:46 -0000 1.135
> +++ sys/sys/systm.h 13 Nov 2017 17:05:52 -0000
> @@ -323,7 +323,7 @@ do { \
>  
>  #define NET_ASSERT_LOCKED() \
>  do { \
> - int _s = rw_status(&netlock); \
> + int _s = rw_status_curproc(&netlock); \
>   if ((splassert_ctl > 0) && (_s != RW_WRITE && _s != RW_READ)) \
>   splassert_fail(RW_READ, _s, __func__); \
>  } while (0)
> Index: sys/sys/witness.h
> ===================================================================
> RCS file: src/sys/sys/witness.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 witness.h
> --- sys/sys/witness.h 20 Apr 2017 12:59:36 -0000 1.1
> +++ sys/sys/witness.h 13 Nov 2017 17:05:52 -0000
> @@ -88,6 +88,7 @@ int witness_line(struct lock_object *);
>  void witness_norelease(struct lock_object *);
>  void witness_releaseok(struct lock_object *);
>  const char *witness_file(struct lock_object *);
> +int witness_status(struct lock_object *);
>  void witness_thread_exit(struct proc *);
>  
>  #ifdef WITNESS
>

Reply | Threaded
Open this post in threaded view
|

Re: un-KERNEL_LOCK() TCP/UDP input & co

Visa Hankala-2
On Tue, Nov 14, 2017 at 10:37:16AM +0100, Martin Pieuchot wrote:
> Note that the diff below still includes a false positive.  If the
> current thread doesn't have the read-lock but another one has it,
> then RW_READ will still be returned.

That's true if witness(4) has not been enabled.