try to bundle multiple packets on an ifq before sending

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
8 messages Options
Reply | Threaded
Open this post in threaded view
|

try to bundle multiple packets on an ifq before sending

David Gwynne-5
this makes ifq_start try to wait for 4 packets before calling
if->if_qstart.

this is based on work sephe did in dragonflybsd, and described in
a comment in their sys/net/if.c. there's a link to it here:
https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/net/if.c#L2976-L2987

the tests we've done generally show a performance bump, but otherwise
no degradation in performance.

the mechanism for bundling packets is to but schedule a task to
service the queue later. if 4 packets get accumulated before the
task runs, it's cancelled and the code runs the start routine
directly.

the most significant difference this implementation has to the dfly
one is that our ifqs dont (currently) track the number of bytes on
the q. dfly sends if it can bundle 4 packets, or up to mtu bytes
worth of packets. this implementation only looks at the number of
packets.

the taskq the ifq uses is one of the softnets, which is assigned
when the ifq is initted and unconditionally used during the ifq's
lifetime. because ifq work could now be pending in a softnet taskq,
ifq_barrier also needs to put a barrier in the taskq. this is
implemented using taskq_barrier, which i wrote ages ago but didn't
have a use case for at the time.

tests? ok?

Index: share/man/man9/task_add.9
===================================================================
RCS file: /cvs/src/share/man/man9/task_add.9,v
retrieving revision 1.16
diff -u -p -r1.16 task_add.9
--- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16
+++ share/man/man9/task_add.9 10 Nov 2017 00:45:41 -0000
@@ -20,6 +20,7 @@
 .Sh NAME
 .Nm taskq_create ,
 .Nm taskq_destroy ,
+.Nm taskq_barrier ,
 .Nm task_set ,
 .Nm task_add ,
 .Nm task_del ,
@@ -37,6 +38,8 @@
 .Ft void
 .Fn taskq_destroy "struct taskq *tq"
 .Ft void
+.Fn taskq_barrier "struct taskq *tq"
+.Ft void
 .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
 .Ft int
 .Fn task_add "struct taskq *tq" "struct task *t"
@@ -88,6 +91,15 @@ Calling
 against the system taskq is an error and will lead to undefined
 behaviour or a system fault.
 .Pp
+.Fn taskq_barrier
+guarantees that any task that was running on the
+.Fa tq
+taskq when the barrier was called has finished by the time the barrier
+returns.
+.Fn taskq_barrier
+is only supported on taskqs serviced by 1 thread,
+and may not be called by a task running in the specified taskq.
+.Pp
 It is the responsibility of the caller to provide the
 .Fn task_set ,
 .Fn task_add ,
@@ -163,6 +175,8 @@ argument given in
 and
 .Fn taskq_destroy
 can be called during autoconf, or from process context.
+.Fn taskq_barrier
+can be called from process context.
 .Fn task_set ,
 .Fn task_add ,
 and
Index: sys/sys/task.h
===================================================================
RCS file: /cvs/src/sys/sys/task.h,v
retrieving revision 1.11
diff -u -p -r1.11 task.h
--- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11
+++ sys/sys/task.h 10 Nov 2017 00:45:41 -0000
@@ -43,6 +43,7 @@ extern struct taskq *const systqmp;
 
 struct taskq *taskq_create(const char *, unsigned int, int, unsigned int);
 void taskq_destroy(struct taskq *);
+void taskq_barrier(struct taskq *);
 
 void task_set(struct task *, void (*)(void *), void *);
 int task_add(struct taskq *, struct task *);
Index: sys/kern/kern_task.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_task.c,v
retrieving revision 1.20
diff -u -p -r1.20 kern_task.c
--- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20
+++ sys/kern/kern_task.c 10 Nov 2017 00:45:41 -0000
@@ -22,6 +22,7 @@
 #include <sys/mutex.h>
 #include <sys/kthread.h>
 #include <sys/task.h>
+#include <sys/proc.h>
 
 #define TASK_ONQUEUE 1
 
@@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys
 
 void taskq_init(void); /* called in init_main.c */
 void taskq_create_thread(void *);
+void taskq_barrier_task(void *);
 int taskq_sleep(const volatile void *, struct mutex *, int,
     const char *, int);
 int taskq_next_work(struct taskq *, struct task *, sleepfn);
@@ -176,6 +178,30 @@ taskq_create_thread(void *arg)
  } while (tq->tq_running < tq->tq_nthreads);
 
  mtx_leave(&tq->tq_mtx);
+}
+
+void
+taskq_barrier(struct taskq *tq)
+{
+ struct sleep_state sls;
+ unsigned int notdone = 1;
+ struct task t = TASK_INITIALIZER(taskq_barrier_task, &notdone);
+
+ task_add(tq, &t);
+
+ while (notdone) {
+ sleep_setup(&sls, &notdone, PWAIT, "tqbar");
+ sleep_finish(&sls, notdone);
+ }
+}
+
+void
+taskq_barrier_task(void *p)
+{
+ unsigned int *notdone = p;
+
+ *notdone = 0;
+ wakeup_one(notdone);
 }
 
 void
Index: sys/net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.12
diff -u -p -r1.12 ifq.c
--- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12
+++ sys/net/ifq.c 10 Nov 2017 00:45:41 -0000
@@ -64,9 +64,16 @@ struct priq {
 void ifq_start_task(void *);
 void ifq_restart_task(void *);
 void ifq_barrier_task(void *);
+void ifq_bundle_task(void *);
 
 #define TASK_ONQUEUE 0x1
 
+static inline void
+ifq_run_start(struct ifqueue *ifq)
+{
+ ifq_serialize(ifq, &ifq->ifq_start);
+}
+
 void
 ifq_serialize(struct ifqueue *ifq, struct task *t)
 {
@@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq)
 }
 
 void
+ifq_start(struct ifqueue *ifq)
+{
+ if (ifq_len(ifq) >= 4) {
+ task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
+ ifq_run_start(ifq);
+ } else
+ task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
+}
+
+void
 ifq_start_task(void *p)
 {
  struct ifqueue *ifq = p;
@@ -131,6 +148,14 @@ ifq_restart_task(void *p)
 }
 
 void
+ifq_bundle_task(void *p)
+{
+ struct ifqueue *ifq = p;
+
+ ifq_run_start(ifq);
+}
+
+void
 ifq_barrier(struct ifqueue *ifq)
 {
  struct sleep_state sls;
@@ -140,6 +165,9 @@ ifq_barrier(struct ifqueue *ifq)
  /* this should only be called from converted drivers */
  KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
 
+ if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
+ taskq_barrier(ifq->ifq_softnet);
+
  if (ifq->ifq_serializer == NULL)
  return;
 
@@ -168,6 +196,7 @@ void
 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
 {
  ifq->ifq_if = ifp;
+ ifq->ifq_softnet = net_tq(ifp->if_index + idx);
  ifq->ifq_softc = NULL;
 
  mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -189,6 +218,7 @@ ifq_init(struct ifqueue *ifq, struct ifn
  mtx_init(&ifq->ifq_task_mtx, IPL_NET);
  TAILQ_INIT(&ifq->ifq_task_list);
  ifq->ifq_serializer = NULL;
+ task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
 
  task_set(&ifq->ifq_start, ifq_start_task, ifq);
  task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
@@ -239,6 +269,8 @@ void
 ifq_destroy(struct ifqueue *ifq)
 {
  struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+ ifq_barrier(ifq); /* ensure nothing is running with the ifq */
 
  /* don't need to lock because this is the last use of the ifq */
 
Index: sys/net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
retrieving revision 1.13
diff -u -p -r1.13 ifq.h
--- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13
+++ sys/net/ifq.h 10 Nov 2017 00:45:41 -0000
@@ -25,6 +25,7 @@ struct ifq_ops;
 
 struct ifqueue {
  struct ifnet *ifq_if;
+ struct taskq *ifq_softnet;
  union {
  void *_ifq_softc;
  /*
@@ -57,6 +58,7 @@ struct ifqueue {
  struct mutex ifq_task_mtx;
  struct task_list ifq_task_list;
  void *ifq_serializer;
+ struct task ifq_bundle;
 
  /* work to be serialised */
  struct task ifq_start;
@@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct
 void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
 void ifq_destroy(struct ifqueue *);
 int ifq_enqueue(struct ifqueue *, struct mbuf *);
+void ifq_start(struct ifqueue *);
 struct mbuf *ifq_deq_begin(struct ifqueue *);
 void ifq_deq_commit(struct ifqueue *, struct mbuf *);
 void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
@@ -411,12 +414,6 @@ static inline unsigned int
 ifq_is_oactive(struct ifqueue *ifq)
 {
  return (ifq->ifq_oactive);
-}
-
-static inline void
-ifq_start(struct ifqueue *ifq)
-{
- ifq_serialize(ifq, &ifq->ifq_start);
 }
 
 static inline void

Reply | Threaded
Open this post in threaded view
|

Re: try to bundle multiple packets on an ifq before sending

Hrvoje Popovski
On 10.11.2017. 1:58, David Gwynne wrote:

> this makes ifq_start try to wait for 4 packets before calling
> if->if_qstart.
>
> this is based on work sephe did in dragonflybsd, and described in
> a comment in their sys/net/if.c. there's a link to it here:
> https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/net/if.c#L2976-L2987
>
> the tests we've done generally show a performance bump, but otherwise
> no degradation in performance.
>
> the mechanism for bundling packets is to but schedule a task to
> service the queue later. if 4 packets get accumulated before the
> task runs, it's cancelled and the code runs the start routine
> directly.
>
> the most significant difference this implementation has to the dfly
> one is that our ifqs dont (currently) track the number of bytes on
> the q. dfly sends if it can bundle 4 packets, or up to mtu bytes
> worth of packets. this implementation only looks at the number of
> packets.
>
> the taskq the ifq uses is one of the softnets, which is assigned
> when the ifq is initted and unconditionally used during the ifq's
> lifetime. because ifq work could now be pending in a softnet taskq,
> ifq_barrier also needs to put a barrier in the taskq. this is
> implemented using taskq_barrier, which i wrote ages ago but didn't
> have a use case for at the time.
>
> tests? ok?

Hi all,

i've tested plain ip4 forwarding performance with and without this diff
and here are results.
12 x Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz, 2400.35 MHz

ix (82599) - sending 14Mpps
plain forwarding - 1.18Mpps
patched forwarding - 1.31Mpps

myx (8BL2-2S) - sending 14Mpps
plain forwarding - 900Kpps
patched forwarding - 1.10Mpps

em (i350) - sending 1.4Mpps
plain forwarding - 1Mpps
patched forwarding - 1.17Mpps

bge (5720) - sending 1.4Mpps
plain forwarding - oscillate from 780Kpps to 940Kpps
patched forwarding - oscillate from 850Kpps to 1Mpps


on box with
6 x Intel(R) Xeon(R) CPU E5-2643 v2 @ 3.50GHz, 3600.55 MHz
and with ix (82599) forwarding performance are the same with or without
diff, which is 1.75Mpps


will play with this diff over weekend just to see if there will be some
ill effect


Reply | Threaded
Open this post in threaded view
|

Re: try to bundle multiple packets on an ifq before sending

Claudio Jeker
In reply to this post by David Gwynne-5
On Fri, Nov 10, 2017 at 10:58:54AM +1000, David Gwynne wrote:

> this makes ifq_start try to wait for 4 packets before calling
> if->if_qstart.
>
> this is based on work sephe did in dragonflybsd, and described in
> a comment in their sys/net/if.c. there's a link to it here:
> https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/net/if.c#L2976-L2987
>
> the tests we've done generally show a performance bump, but otherwise
> no degradation in performance.
>
> the mechanism for bundling packets is to but schedule a task to
> service the queue later. if 4 packets get accumulated before the
> task runs, it's cancelled and the code runs the start routine
> directly.
>
> the most significant difference this implementation has to the dfly
> one is that our ifqs dont (currently) track the number of bytes on
> the q. dfly sends if it can bundle 4 packets, or up to mtu bytes
> worth of packets. this implementation only looks at the number of
> packets.
>
> the taskq the ifq uses is one of the softnets, which is assigned
> when the ifq is initted and unconditionally used during the ifq's
> lifetime. because ifq work could now be pending in a softnet taskq,
> ifq_barrier also needs to put a barrier in the taskq. this is
> implemented using taskq_barrier, which i wrote ages ago but didn't
> have a use case for at the time.
>
> tests? ok?

I wonder what the effect on the latency is. Especially the case when using
a managment interface on a else busy router. If I understand it correctly
the delay should not change but I'm unsure.

 

> Index: share/man/man9/task_add.9
> ===================================================================
> RCS file: /cvs/src/share/man/man9/task_add.9,v
> retrieving revision 1.16
> diff -u -p -r1.16 task_add.9
> --- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16
> +++ share/man/man9/task_add.9 10 Nov 2017 00:45:41 -0000
> @@ -20,6 +20,7 @@
>  .Sh NAME
>  .Nm taskq_create ,
>  .Nm taskq_destroy ,
> +.Nm taskq_barrier ,
>  .Nm task_set ,
>  .Nm task_add ,
>  .Nm task_del ,
> @@ -37,6 +38,8 @@
>  .Ft void
>  .Fn taskq_destroy "struct taskq *tq"
>  .Ft void
> +.Fn taskq_barrier "struct taskq *tq"
> +.Ft void
>  .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
>  .Ft int
>  .Fn task_add "struct taskq *tq" "struct task *t"
> @@ -88,6 +91,15 @@ Calling
>  against the system taskq is an error and will lead to undefined
>  behaviour or a system fault.
>  .Pp
> +.Fn taskq_barrier
> +guarantees that any task that was running on the
> +.Fa tq
> +taskq when the barrier was called has finished by the time the barrier
> +returns.
> +.Fn taskq_barrier
> +is only supported on taskqs serviced by 1 thread,
> +and may not be called by a task running in the specified taskq.
> +.Pp
>  It is the responsibility of the caller to provide the
>  .Fn task_set ,
>  .Fn task_add ,
> @@ -163,6 +175,8 @@ argument given in
>  and
>  .Fn taskq_destroy
>  can be called during autoconf, or from process context.
> +.Fn taskq_barrier
> +can be called from process context.
>  .Fn task_set ,
>  .Fn task_add ,
>  and
> Index: sys/sys/task.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/task.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 task.h
> --- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11
> +++ sys/sys/task.h 10 Nov 2017 00:45:41 -0000
> @@ -43,6 +43,7 @@ extern struct taskq *const systqmp;
>  
>  struct taskq *taskq_create(const char *, unsigned int, int, unsigned int);
>  void taskq_destroy(struct taskq *);
> +void taskq_barrier(struct taskq *);
>  
>  void task_set(struct task *, void (*)(void *), void *);
>  int task_add(struct taskq *, struct task *);
> Index: sys/kern/kern_task.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_task.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 kern_task.c
> --- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20
> +++ sys/kern/kern_task.c 10 Nov 2017 00:45:41 -0000
> @@ -22,6 +22,7 @@
>  #include <sys/mutex.h>
>  #include <sys/kthread.h>
>  #include <sys/task.h>
> +#include <sys/proc.h>
>  
>  #define TASK_ONQUEUE 1
>  
> @@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys
>  
>  void taskq_init(void); /* called in init_main.c */
>  void taskq_create_thread(void *);
> +void taskq_barrier_task(void *);
>  int taskq_sleep(const volatile void *, struct mutex *, int,
>      const char *, int);
>  int taskq_next_work(struct taskq *, struct task *, sleepfn);
> @@ -176,6 +178,30 @@ taskq_create_thread(void *arg)
>   } while (tq->tq_running < tq->tq_nthreads);
>  
>   mtx_leave(&tq->tq_mtx);
> +}
> +
> +void
> +taskq_barrier(struct taskq *tq)
> +{
> + struct sleep_state sls;
> + unsigned int notdone = 1;
> + struct task t = TASK_INITIALIZER(taskq_barrier_task, &notdone);
> +
> + task_add(tq, &t);
> +
> + while (notdone) {
> + sleep_setup(&sls, &notdone, PWAIT, "tqbar");
> + sleep_finish(&sls, notdone);
> + }
> +}
> +
> +void
> +taskq_barrier_task(void *p)
> +{
> + unsigned int *notdone = p;
> +
> + *notdone = 0;
> + wakeup_one(notdone);
>  }
>  
>  void
> Index: sys/net/ifq.c
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.c,v
> retrieving revision 1.12
> diff -u -p -r1.12 ifq.c
> --- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12
> +++ sys/net/ifq.c 10 Nov 2017 00:45:41 -0000
> @@ -64,9 +64,16 @@ struct priq {
>  void ifq_start_task(void *);
>  void ifq_restart_task(void *);
>  void ifq_barrier_task(void *);
> +void ifq_bundle_task(void *);
>  
>  #define TASK_ONQUEUE 0x1
>  
> +static inline void
> +ifq_run_start(struct ifqueue *ifq)
> +{
> + ifq_serialize(ifq, &ifq->ifq_start);
> +}
> +
>  void
>  ifq_serialize(struct ifqueue *ifq, struct task *t)
>  {
> @@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq)
>  }
>  
>  void
> +ifq_start(struct ifqueue *ifq)
> +{
> + if (ifq_len(ifq) >= 4) {
> + task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
> + ifq_run_start(ifq);
> + } else
> + task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
> +}
> +
> +void
>  ifq_start_task(void *p)
>  {
>   struct ifqueue *ifq = p;
> @@ -131,6 +148,14 @@ ifq_restart_task(void *p)
>  }
>  
>  void
> +ifq_bundle_task(void *p)
> +{
> + struct ifqueue *ifq = p;
> +
> + ifq_run_start(ifq);
> +}
> +
> +void
>  ifq_barrier(struct ifqueue *ifq)
>  {
>   struct sleep_state sls;
> @@ -140,6 +165,9 @@ ifq_barrier(struct ifqueue *ifq)
>   /* this should only be called from converted drivers */
>   KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
>  
> + if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
> + taskq_barrier(ifq->ifq_softnet);
> +
>   if (ifq->ifq_serializer == NULL)
>   return;
>  
> @@ -168,6 +196,7 @@ void
>  ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
>  {
>   ifq->ifq_if = ifp;
> + ifq->ifq_softnet = net_tq(ifp->if_index + idx);
>   ifq->ifq_softc = NULL;
>  
>   mtx_init(&ifq->ifq_mtx, IPL_NET);
> @@ -189,6 +218,7 @@ ifq_init(struct ifqueue *ifq, struct ifn
>   mtx_init(&ifq->ifq_task_mtx, IPL_NET);
>   TAILQ_INIT(&ifq->ifq_task_list);
>   ifq->ifq_serializer = NULL;
> + task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
>  
>   task_set(&ifq->ifq_start, ifq_start_task, ifq);
>   task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
> @@ -239,6 +269,8 @@ void
>  ifq_destroy(struct ifqueue *ifq)
>  {
>   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> +
> + ifq_barrier(ifq); /* ensure nothing is running with the ifq */
>  
>   /* don't need to lock because this is the last use of the ifq */
>  
> Index: sys/net/ifq.h
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.h,v
> retrieving revision 1.13
> diff -u -p -r1.13 ifq.h
> --- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13
> +++ sys/net/ifq.h 10 Nov 2017 00:45:41 -0000
> @@ -25,6 +25,7 @@ struct ifq_ops;
>  
>  struct ifqueue {
>   struct ifnet *ifq_if;
> + struct taskq *ifq_softnet;
>   union {
>   void *_ifq_softc;
>   /*
> @@ -57,6 +58,7 @@ struct ifqueue {
>   struct mutex ifq_task_mtx;
>   struct task_list ifq_task_list;
>   void *ifq_serializer;
> + struct task ifq_bundle;
>  
>   /* work to be serialised */
>   struct task ifq_start;
> @@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct
>  void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
>  void ifq_destroy(struct ifqueue *);
>  int ifq_enqueue(struct ifqueue *, struct mbuf *);
> +void ifq_start(struct ifqueue *);
>  struct mbuf *ifq_deq_begin(struct ifqueue *);
>  void ifq_deq_commit(struct ifqueue *, struct mbuf *);
>  void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
> @@ -411,12 +414,6 @@ static inline unsigned int
>  ifq_is_oactive(struct ifqueue *ifq)
>  {
>   return (ifq->ifq_oactive);
> -}
> -
> -static inline void
> -ifq_start(struct ifqueue *ifq)
> -{
> - ifq_serialize(ifq, &ifq->ifq_start);
>  }
>  
>  static inline void
>

--
:wq Claudio

Reply | Threaded
Open this post in threaded view
|

Re: try to bundle multiple packets on an ifq before sending

Martin Pieuchot
In reply to this post by David Gwynne-5
On 10/11/17(Fri) 10:58, David Gwynne wrote:
> this makes ifq_start try to wait for 4 packets before calling
> if->if_qstart.

So you're adding back the IFXF_TXREADY mechanism that you removed in
2015 in r1.418.  At that time we couldn't clearly see how to make it
MP-safe.

> this is based on work sephe did in dragonflybsd, and described in
> a comment in their sys/net/if.c. there's a link to it here:
> https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/net/if.c#L2976-L2987
>
> the tests we've done generally show a performance bump, but otherwise
> no degradation in performance.
>
> the mechanism for bundling packets is to but schedule a task to
> service the queue later. if 4 packets get accumulated before the
> task runs, it's cancelled and the code runs the start routine
> directly.
>
> the most significant difference this implementation has to the dfly
> one is that our ifqs dont (currently) track the number of bytes on
> the q. dfly sends if it can bundle 4 packets, or up to mtu bytes
> worth of packets. this implementation only looks at the number of
> packets.

IFXF_TXREADY had a different magic.  Instead of 4 packets it looked at:

        min(8, ifp->if_snd.ifq_maxlen)

It also checked for ifq_is_oactive()...  Why is this logic no longer
relevant?

> the taskq the ifq uses is one of the softnets, which is assigned
> when the ifq is initted and unconditionally used during the ifq's
> lifetime.

We're currently using net_tq() to distribute load for incoming packets.
So I believe you should schedule the task on the current taskq or the
first one if coming from userland.

>           because ifq work could now be pending in a softnet taskq,
> ifq_barrier also needs to put a barrier in the taskq. this is
> implemented using taskq_barrier, which i wrote ages ago but didn't
> have a use case for at the time.
>
> tests? ok?
>
> Index: share/man/man9/task_add.9
> ===================================================================
> RCS file: /cvs/src/share/man/man9/task_add.9,v
> retrieving revision 1.16
> diff -u -p -r1.16 task_add.9
> --- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16
> +++ share/man/man9/task_add.9 10 Nov 2017 00:45:41 -0000
> @@ -20,6 +20,7 @@
>  .Sh NAME
>  .Nm taskq_create ,
>  .Nm taskq_destroy ,
> +.Nm taskq_barrier ,
>  .Nm task_set ,
>  .Nm task_add ,
>  .Nm task_del ,
> @@ -37,6 +38,8 @@
>  .Ft void
>  .Fn taskq_destroy "struct taskq *tq"
>  .Ft void
> +.Fn taskq_barrier "struct taskq *tq"
> +.Ft void
>  .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
>  .Ft int
>  .Fn task_add "struct taskq *tq" "struct task *t"
> @@ -88,6 +91,15 @@ Calling
>  against the system taskq is an error and will lead to undefined
>  behaviour or a system fault.
>  .Pp
> +.Fn taskq_barrier
> +guarantees that any task that was running on the
> +.Fa tq
> +taskq when the barrier was called has finished by the time the barrier
> +returns.
> +.Fn taskq_barrier
> +is only supported on taskqs serviced by 1 thread,
> +and may not be called by a task running in the specified taskq.
> +.Pp
>  It is the responsibility of the caller to provide the
>  .Fn task_set ,
>  .Fn task_add ,
> @@ -163,6 +175,8 @@ argument given in
>  and
>  .Fn taskq_destroy
>  can be called during autoconf, or from process context.
> +.Fn taskq_barrier
> +can be called from process context.
>  .Fn task_set ,
>  .Fn task_add ,
>  and
> Index: sys/sys/task.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/task.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 task.h
> --- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11
> +++ sys/sys/task.h 10 Nov 2017 00:45:41 -0000
> @@ -43,6 +43,7 @@ extern struct taskq *const systqmp;
>  
>  struct taskq *taskq_create(const char *, unsigned int, int, unsigned int);
>  void taskq_destroy(struct taskq *);
> +void taskq_barrier(struct taskq *);
>  
>  void task_set(struct task *, void (*)(void *), void *);
>  int task_add(struct taskq *, struct task *);
> Index: sys/kern/kern_task.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_task.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 kern_task.c
> --- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20
> +++ sys/kern/kern_task.c 10 Nov 2017 00:45:41 -0000
> @@ -22,6 +22,7 @@
>  #include <sys/mutex.h>
>  #include <sys/kthread.h>
>  #include <sys/task.h>
> +#include <sys/proc.h>
>  
>  #define TASK_ONQUEUE 1
>  
> @@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys
>  
>  void taskq_init(void); /* called in init_main.c */
>  void taskq_create_thread(void *);
> +void taskq_barrier_task(void *);
>  int taskq_sleep(const volatile void *, struct mutex *, int,
>      const char *, int);
>  int taskq_next_work(struct taskq *, struct task *, sleepfn);
> @@ -176,6 +178,30 @@ taskq_create_thread(void *arg)
>   } while (tq->tq_running < tq->tq_nthreads);
>  
>   mtx_leave(&tq->tq_mtx);
> +}
> +
> +void
> +taskq_barrier(struct taskq *tq)
> +{
> + struct sleep_state sls;
> + unsigned int notdone = 1;
> + struct task t = TASK_INITIALIZER(taskq_barrier_task, &notdone);
> +
> + task_add(tq, &t);
> +
> + while (notdone) {
> + sleep_setup(&sls, &notdone, PWAIT, "tqbar");
> + sleep_finish(&sls, notdone);
> + }
> +}
> +
> +void
> +taskq_barrier_task(void *p)
> +{
> + unsigned int *notdone = p;
> +
> + *notdone = 0;
> + wakeup_one(notdone);
>  }
>  
>  void
> Index: sys/net/ifq.c
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.c,v
> retrieving revision 1.12
> diff -u -p -r1.12 ifq.c
> --- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12
> +++ sys/net/ifq.c 10 Nov 2017 00:45:41 -0000
> @@ -64,9 +64,16 @@ struct priq {
>  void ifq_start_task(void *);
>  void ifq_restart_task(void *);
>  void ifq_barrier_task(void *);
> +void ifq_bundle_task(void *);
>  
>  #define TASK_ONQUEUE 0x1
>  
> +static inline void
> +ifq_run_start(struct ifqueue *ifq)
> +{
> + ifq_serialize(ifq, &ifq->ifq_start);
> +}
> +
>  void
>  ifq_serialize(struct ifqueue *ifq, struct task *t)
>  {
> @@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq)
>  }
>  
>  void
> +ifq_start(struct ifqueue *ifq)
> +{
> + if (ifq_len(ifq) >= 4) {
> + task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
> + ifq_run_start(ifq);
> + } else
> + task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
> +}
> +
> +void
>  ifq_start_task(void *p)
>  {
>   struct ifqueue *ifq = p;
> @@ -131,6 +148,14 @@ ifq_restart_task(void *p)
>  }
>  
>  void
> +ifq_bundle_task(void *p)
> +{
> + struct ifqueue *ifq = p;
> +
> + ifq_run_start(ifq);
> +}
> +
> +void
>  ifq_barrier(struct ifqueue *ifq)
>  {
>   struct sleep_state sls;
> @@ -140,6 +165,9 @@ ifq_barrier(struct ifqueue *ifq)
>   /* this should only be called from converted drivers */
>   KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
>  
> + if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
> + taskq_barrier(ifq->ifq_softnet);
> +
>   if (ifq->ifq_serializer == NULL)
>   return;
>  
> @@ -168,6 +196,7 @@ void
>  ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
>  {
>   ifq->ifq_if = ifp;
> + ifq->ifq_softnet = net_tq(ifp->if_index + idx);
>   ifq->ifq_softc = NULL;
>  
>   mtx_init(&ifq->ifq_mtx, IPL_NET);
> @@ -189,6 +218,7 @@ ifq_init(struct ifqueue *ifq, struct ifn
>   mtx_init(&ifq->ifq_task_mtx, IPL_NET);
>   TAILQ_INIT(&ifq->ifq_task_list);
>   ifq->ifq_serializer = NULL;
> + task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
>  
>   task_set(&ifq->ifq_start, ifq_start_task, ifq);
>   task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
> @@ -239,6 +269,8 @@ void
>  ifq_destroy(struct ifqueue *ifq)
>  {
>   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> +
> + ifq_barrier(ifq); /* ensure nothing is running with the ifq */
>  
>   /* don't need to lock because this is the last use of the ifq */
>  
> Index: sys/net/ifq.h
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.h,v
> retrieving revision 1.13
> diff -u -p -r1.13 ifq.h
> --- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13
> +++ sys/net/ifq.h 10 Nov 2017 00:45:41 -0000
> @@ -25,6 +25,7 @@ struct ifq_ops;
>  
>  struct ifqueue {
>   struct ifnet *ifq_if;
> + struct taskq *ifq_softnet;
>   union {
>   void *_ifq_softc;
>   /*
> @@ -57,6 +58,7 @@ struct ifqueue {
>   struct mutex ifq_task_mtx;
>   struct task_list ifq_task_list;
>   void *ifq_serializer;
> + struct task ifq_bundle;
>  
>   /* work to be serialised */
>   struct task ifq_start;
> @@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct
>  void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
>  void ifq_destroy(struct ifqueue *);
>  int ifq_enqueue(struct ifqueue *, struct mbuf *);
> +void ifq_start(struct ifqueue *);
>  struct mbuf *ifq_deq_begin(struct ifqueue *);
>  void ifq_deq_commit(struct ifqueue *, struct mbuf *);
>  void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
> @@ -411,12 +414,6 @@ static inline unsigned int
>  ifq_is_oactive(struct ifqueue *ifq)
>  {
>   return (ifq->ifq_oactive);
> -}
> -
> -static inline void
> -ifq_start(struct ifqueue *ifq)
> -{
> - ifq_serialize(ifq, &ifq->ifq_start);
>  }
>  
>  static inline void
>

Reply | Threaded
Open this post in threaded view
|

Re: try to bundle multiple packets on an ifq before sending

David Gwynne-5
On Sun, Nov 12, 2017 at 02:45:05PM +0100, Martin Pieuchot wrote:
> On 10/11/17(Fri) 10:58, David Gwynne wrote:
> > this makes ifq_start try to wait for 4 packets before calling
> > if->if_qstart.
>
> So you're adding back the IFXF_TXREADY mechanism that you removed in
> 2015 in r1.418.  At that time we couldn't clearly see how to make it
> MP-safe.

ha, id forgotten about that :)

>
> > this is based on work sephe did in dragonflybsd, and described in
> > a comment in their sys/net/if.c. there's a link to it here:
> > https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/net/if.c#L2976-L2987
> >
> > the tests we've done generally show a performance bump, but otherwise
> > no degradation in performance.
> >
> > the mechanism for bundling packets is to but schedule a task to
> > service the queue later. if 4 packets get accumulated before the
> > task runs, it's cancelled and the code runs the start routine
> > directly.
> >
> > the most significant difference this implementation has to the dfly
> > one is that our ifqs dont (currently) track the number of bytes on
> > the q. dfly sends if it can bundle 4 packets, or up to mtu bytes
> > worth of packets. this implementation only looks at the number of
> > packets.
>
> IFXF_TXREADY had a different magic.  Instead of 4 packets it looked at:
>
> min(8, ifp->if_snd.ifq_maxlen)

8 was magic, i picked it because it was a nice round number. dfly
uses 4 because sephe tested a bunch of values and didnt see an
improvement beyond 4.

> It also checked for ifq_is_oactive()...  Why is this logic no longer
> relevant?

ifq_start_task called via ifq_run_start (which in is called via
ifq_start or ifq_bundle_task) checks ifq_is_oactive. doing it once
should be enough.

>
> > the taskq the ifq uses is one of the softnets, which is assigned
> > when the ifq is initted and unconditionally used during the ifq's
> > lifetime.
>
> We're currently using net_tq() to distribute load for incoming packets.
> So I believe you should schedule the task on the current taskq or the
> first one if coming from userland.

/em shrug.

i dont know how to tell what the current softnet tq is. would it
be enough to simply not mix if the ifq_idx into the argument to
net_tq?

>
> >           because ifq work could now be pending in a softnet taskq,
> > ifq_barrier also needs to put a barrier in the taskq. this is
> > implemented using taskq_barrier, which i wrote ages ago but didn't
> > have a use case for at the time.

both hrvoje and i hit a deadlock when downing an interface. if
softnet (or softnets) are waiting on the netlock that and ioctl
path holds, then an ifq_barrier in that ioctl path will sleep forever
waiting for a task to run in softnets that are waiting for the
netlock.

to mitigate this ive added code like what uvm has. thoughts?

Index: share/man/man9/task_add.9
===================================================================
RCS file: /cvs/src/share/man/man9/task_add.9,v
retrieving revision 1.16
diff -u -p -r1.16 task_add.9
--- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16
+++ share/man/man9/task_add.9 13 Nov 2017 00:46:17 -0000
@@ -20,6 +20,7 @@
 .Sh NAME
 .Nm taskq_create ,
 .Nm taskq_destroy ,
+.Nm taskq_barrier ,
 .Nm task_set ,
 .Nm task_add ,
 .Nm task_del ,
@@ -37,6 +38,8 @@
 .Ft void
 .Fn taskq_destroy "struct taskq *tq"
 .Ft void
+.Fn taskq_barrier "struct taskq *tq"
+.Ft void
 .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
 .Ft int
 .Fn task_add "struct taskq *tq" "struct task *t"
@@ -88,6 +91,15 @@ Calling
 against the system taskq is an error and will lead to undefined
 behaviour or a system fault.
 .Pp
+.Fn taskq_barrier
+guarantees that any task that was running on the
+.Fa tq
+taskq when the barrier was called has finished by the time the barrier
+returns.
+.Fn taskq_barrier
+is only supported on taskqs serviced by 1 thread,
+and may not be called by a task running in the specified taskq.
+.Pp
 It is the responsibility of the caller to provide the
 .Fn task_set ,
 .Fn task_add ,
@@ -163,6 +175,8 @@ argument given in
 and
 .Fn taskq_destroy
 can be called during autoconf, or from process context.
+.Fn taskq_barrier
+can be called from process context.
 .Fn task_set ,
 .Fn task_add ,
 and
Index: sys/sys/task.h
===================================================================
RCS file: /cvs/src/sys/sys/task.h,v
retrieving revision 1.11
diff -u -p -r1.11 task.h
--- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11
+++ sys/sys/task.h 13 Nov 2017 00:46:17 -0000
@@ -43,6 +43,7 @@ extern struct taskq *const systqmp;
 
 struct taskq *taskq_create(const char *, unsigned int, int, unsigned int);
 void taskq_destroy(struct taskq *);
+void taskq_barrier(struct taskq *);
 
 void task_set(struct task *, void (*)(void *), void *);
 int task_add(struct taskq *, struct task *);
Index: sys/kern/kern_task.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_task.c,v
retrieving revision 1.20
diff -u -p -r1.20 kern_task.c
--- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20
+++ sys/kern/kern_task.c 13 Nov 2017 00:46:17 -0000
@@ -22,6 +22,7 @@
 #include <sys/mutex.h>
 #include <sys/kthread.h>
 #include <sys/task.h>
+#include <sys/proc.h>
 
 #define TASK_ONQUEUE 1
 
@@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys
 
 void taskq_init(void); /* called in init_main.c */
 void taskq_create_thread(void *);
+void taskq_barrier_task(void *);
 int taskq_sleep(const volatile void *, struct mutex *, int,
     const char *, int);
 int taskq_next_work(struct taskq *, struct task *, sleepfn);
@@ -176,6 +178,30 @@ taskq_create_thread(void *arg)
  } while (tq->tq_running < tq->tq_nthreads);
 
  mtx_leave(&tq->tq_mtx);
+}
+
+void
+taskq_barrier(struct taskq *tq)
+{
+ struct sleep_state sls;
+ unsigned int notdone = 1;
+ struct task t = TASK_INITIALIZER(taskq_barrier_task, &notdone);
+
+ task_add(tq, &t);
+
+ while (notdone) {
+ sleep_setup(&sls, &notdone, PWAIT, "tqbar");
+ sleep_finish(&sls, notdone);
+ }
+}
+
+void
+taskq_barrier_task(void *p)
+{
+ unsigned int *notdone = p;
+
+ *notdone = 0;
+ wakeup_one(notdone);
 }
 
 void
Index: sys/net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.12
diff -u -p -r1.12 ifq.c
--- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12
+++ sys/net/ifq.c 13 Nov 2017 00:46:17 -0000
@@ -64,9 +64,16 @@ struct priq {
 void ifq_start_task(void *);
 void ifq_restart_task(void *);
 void ifq_barrier_task(void *);
+void ifq_bundle_task(void *);
 
 #define TASK_ONQUEUE 0x1
 
+static inline void
+ifq_run_start(struct ifqueue *ifq)
+{
+ ifq_serialize(ifq, &ifq->ifq_start);
+}
+
 void
 ifq_serialize(struct ifqueue *ifq, struct task *t)
 {
@@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq)
 }
 
 void
+ifq_start(struct ifqueue *ifq)
+{
+ if (ifq_len(ifq) >= 4) {
+ task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
+ ifq_run_start(ifq);
+        } else
+                task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
+}
+
+void
 ifq_start_task(void *p)
 {
  struct ifqueue *ifq = p;
@@ -131,6 +148,14 @@ ifq_restart_task(void *p)
 }
 
 void
+ifq_bundle_task(void *p)
+{
+ struct ifqueue *ifq = p;
+
+ ifq_run_start(ifq);
+}
+
+void
 ifq_barrier(struct ifqueue *ifq)
 {
  struct sleep_state sls;
@@ -140,6 +165,18 @@ ifq_barrier(struct ifqueue *ifq)
  /* this should only be called from converted drivers */
  KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
 
+ if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) {
+ int netlocked = (rw_status(&netlock) == RW_WRITE);
+
+ if (netlocked)
+ NET_UNLOCK();
+
+ taskq_barrier(ifq->ifq_softnet);
+
+ if (netlocked)
+ NET_LOCK();
+ }
+
  if (ifq->ifq_serializer == NULL)
  return;
 
@@ -168,6 +205,7 @@ void
 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
 {
  ifq->ifq_if = ifp;
+ ifq->ifq_softnet = net_tq(ifp->if_index);
  ifq->ifq_softc = NULL;
 
  mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -189,6 +227,7 @@ ifq_init(struct ifqueue *ifq, struct ifn
  mtx_init(&ifq->ifq_task_mtx, IPL_NET);
  TAILQ_INIT(&ifq->ifq_task_list);
  ifq->ifq_serializer = NULL;
+ task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
 
  task_set(&ifq->ifq_start, ifq_start_task, ifq);
  task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
@@ -239,6 +278,8 @@ void
 ifq_destroy(struct ifqueue *ifq)
 {
  struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+ ifq_barrier(ifq); /* ensure nothing is running with the ifq */
 
  /* don't need to lock because this is the last use of the ifq */
 
Index: sys/net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
retrieving revision 1.13
diff -u -p -r1.13 ifq.h
--- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13
+++ sys/net/ifq.h 13 Nov 2017 00:46:17 -0000
@@ -25,6 +25,7 @@ struct ifq_ops;
 
 struct ifqueue {
  struct ifnet *ifq_if;
+ struct taskq *ifq_softnet;
  union {
  void *_ifq_softc;
  /*
@@ -57,6 +58,7 @@ struct ifqueue {
  struct mutex ifq_task_mtx;
  struct task_list ifq_task_list;
  void *ifq_serializer;
+ struct task ifq_bundle;
 
  /* work to be serialised */
  struct task ifq_start;
@@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct
 void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
 void ifq_destroy(struct ifqueue *);
 int ifq_enqueue(struct ifqueue *, struct mbuf *);
+void ifq_start(struct ifqueue *);
 struct mbuf *ifq_deq_begin(struct ifqueue *);
 void ifq_deq_commit(struct ifqueue *, struct mbuf *);
 void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
@@ -411,12 +414,6 @@ static inline unsigned int
 ifq_is_oactive(struct ifqueue *ifq)
 {
  return (ifq->ifq_oactive);
-}
-
-static inline void
-ifq_start(struct ifqueue *ifq)
-{
- ifq_serialize(ifq, &ifq->ifq_start);
 }
 
 static inline void

Reply | Threaded
Open this post in threaded view
|

Re: try to bundle multiple packets on an ifq before sending

Martin Pieuchot
On 13/11/17(Mon) 10:56, David Gwynne wrote:
> On Sun, Nov 12, 2017 at 02:45:05PM +0100, Martin Pieuchot wrote:
> > [...]
> > We're currently using net_tq() to distribute load for incoming packets.
> > So I believe you should schedule the task on the current taskq or the
> > first one if coming from userland.
>
> /em shrug.
>
> i dont know how to tell what the current softnet tq is.

That's something we need to know.  This will allow to have per taskq
data structure.

>                                                         would it
> be enough to simply not mix if the ifq_idx into the argument to
> net_tq?

Maybe.  If a CPU processing packets from myx0 in softnet0 and want to
send forward via myx1, using net_tq() like you do it know will pick
softnet1.
My suggestion was to keep softnet1 busy with processing packets coming
from myx1 and enqueue the task on softnet0.  This way no inter CPU
communication is needed.
But without testing we can't tell if it's more efficient the other way.

> > >           because ifq work could now be pending in a softnet taskq,
> > > ifq_barrier also needs to put a barrier in the taskq. this is
> > > implemented using taskq_barrier, which i wrote ages ago but didn't
> > > have a use case for at the time.
>
> both hrvoje and i hit a deadlock when downing an interface. if
> softnet (or softnets) are waiting on the netlock that and ioctl
> path holds, then an ifq_barrier in that ioctl path will sleep forever
> waiting for a task to run in softnets that are waiting for the
> netlock.
>
> to mitigate this ive added code like what uvm has. thoughts?

That's only called in ioctl(2) path, right?  Which commands end up
there?

The problem with driver *_ioctl() routines is that pseudo-driver need
the NET_LOCK() while the others should not.  So I'd rather see the
NET_LOCK() released before calling ifp->if_ioctl().

> Index: share/man/man9/task_add.9
> ===================================================================
> RCS file: /cvs/src/share/man/man9/task_add.9,v
> retrieving revision 1.16
> diff -u -p -r1.16 task_add.9
> --- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16
> +++ share/man/man9/task_add.9 13 Nov 2017 00:46:17 -0000
> @@ -20,6 +20,7 @@
>  .Sh NAME
>  .Nm taskq_create ,
>  .Nm taskq_destroy ,
> +.Nm taskq_barrier ,
>  .Nm task_set ,
>  .Nm task_add ,
>  .Nm task_del ,
> @@ -37,6 +38,8 @@
>  .Ft void
>  .Fn taskq_destroy "struct taskq *tq"
>  .Ft void
> +.Fn taskq_barrier "struct taskq *tq"
> +.Ft void
>  .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
>  .Ft int
>  .Fn task_add "struct taskq *tq" "struct task *t"
> @@ -88,6 +91,15 @@ Calling
>  against the system taskq is an error and will lead to undefined
>  behaviour or a system fault.
>  .Pp
> +.Fn taskq_barrier
> +guarantees that any task that was running on the
> +.Fa tq
> +taskq when the barrier was called has finished by the time the barrier
> +returns.
> +.Fn taskq_barrier
> +is only supported on taskqs serviced by 1 thread,
> +and may not be called by a task running in the specified taskq.
> +.Pp
>  It is the responsibility of the caller to provide the
>  .Fn task_set ,
>  .Fn task_add ,
> @@ -163,6 +175,8 @@ argument given in
>  and
>  .Fn taskq_destroy
>  can be called during autoconf, or from process context.
> +.Fn taskq_barrier
> +can be called from process context.
>  .Fn task_set ,
>  .Fn task_add ,
>  and
> Index: sys/sys/task.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/task.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 task.h
> --- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11
> +++ sys/sys/task.h 13 Nov 2017 00:46:17 -0000
> @@ -43,6 +43,7 @@ extern struct taskq *const systqmp;
>  
>  struct taskq *taskq_create(const char *, unsigned int, int, unsigned int);
>  void taskq_destroy(struct taskq *);
> +void taskq_barrier(struct taskq *);
>  
>  void task_set(struct task *, void (*)(void *), void *);
>  int task_add(struct taskq *, struct task *);
> Index: sys/kern/kern_task.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_task.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 kern_task.c
> --- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20
> +++ sys/kern/kern_task.c 13 Nov 2017 00:46:17 -0000
> @@ -22,6 +22,7 @@
>  #include <sys/mutex.h>
>  #include <sys/kthread.h>
>  #include <sys/task.h>
> +#include <sys/proc.h>
>  
>  #define TASK_ONQUEUE 1
>  
> @@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys
>  
>  void taskq_init(void); /* called in init_main.c */
>  void taskq_create_thread(void *);
> +void taskq_barrier_task(void *);
>  int taskq_sleep(const volatile void *, struct mutex *, int,
>      const char *, int);
>  int taskq_next_work(struct taskq *, struct task *, sleepfn);
> @@ -176,6 +178,30 @@ taskq_create_thread(void *arg)
>   } while (tq->tq_running < tq->tq_nthreads);
>  
>   mtx_leave(&tq->tq_mtx);
> +}
> +
> +void
> +taskq_barrier(struct taskq *tq)
> +{
> + struct sleep_state sls;
> + unsigned int notdone = 1;
> + struct task t = TASK_INITIALIZER(taskq_barrier_task, &notdone);
> +
> + task_add(tq, &t);
> +
> + while (notdone) {
> + sleep_setup(&sls, &notdone, PWAIT, "tqbar");
> + sleep_finish(&sls, notdone);
> + }
> +}
> +
> +void
> +taskq_barrier_task(void *p)
> +{
> + unsigned int *notdone = p;
> +
> + *notdone = 0;
> + wakeup_one(notdone);
>  }
>  
>  void
> Index: sys/net/ifq.c
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.c,v
> retrieving revision 1.12
> diff -u -p -r1.12 ifq.c
> --- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12
> +++ sys/net/ifq.c 13 Nov 2017 00:46:17 -0000
> @@ -64,9 +64,16 @@ struct priq {
>  void ifq_start_task(void *);
>  void ifq_restart_task(void *);
>  void ifq_barrier_task(void *);
> +void ifq_bundle_task(void *);
>  
>  #define TASK_ONQUEUE 0x1
>  
> +static inline void
> +ifq_run_start(struct ifqueue *ifq)
> +{
> + ifq_serialize(ifq, &ifq->ifq_start);
> +}
> +
>  void
>  ifq_serialize(struct ifqueue *ifq, struct task *t)
>  {
> @@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq)
>  }
>  
>  void
> +ifq_start(struct ifqueue *ifq)
> +{
> + if (ifq_len(ifq) >= 4) {
> + task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
> + ifq_run_start(ifq);
> +        } else
> +                task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
> +}
> +
> +void
>  ifq_start_task(void *p)
>  {
>   struct ifqueue *ifq = p;
> @@ -131,6 +148,14 @@ ifq_restart_task(void *p)
>  }
>  
>  void
> +ifq_bundle_task(void *p)
> +{
> + struct ifqueue *ifq = p;
> +
> + ifq_run_start(ifq);
> +}
> +
> +void
>  ifq_barrier(struct ifqueue *ifq)
>  {
>   struct sleep_state sls;
> @@ -140,6 +165,18 @@ ifq_barrier(struct ifqueue *ifq)
>   /* this should only be called from converted drivers */
>   KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
>  
> + if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) {
> + int netlocked = (rw_status(&netlock) == RW_WRITE);
> +
> + if (netlocked)
> + NET_UNLOCK();
> +
> + taskq_barrier(ifq->ifq_softnet);
> +
> + if (netlocked)
> + NET_LOCK();
> + }
> +
>   if (ifq->ifq_serializer == NULL)
>   return;
>  
> @@ -168,6 +205,7 @@ void
>  ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
>  {
>   ifq->ifq_if = ifp;
> + ifq->ifq_softnet = net_tq(ifp->if_index);
>   ifq->ifq_softc = NULL;
>  
>   mtx_init(&ifq->ifq_mtx, IPL_NET);
> @@ -189,6 +227,7 @@ ifq_init(struct ifqueue *ifq, struct ifn
>   mtx_init(&ifq->ifq_task_mtx, IPL_NET);
>   TAILQ_INIT(&ifq->ifq_task_list);
>   ifq->ifq_serializer = NULL;
> + task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
>  
>   task_set(&ifq->ifq_start, ifq_start_task, ifq);
>   task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
> @@ -239,6 +278,8 @@ void
>  ifq_destroy(struct ifqueue *ifq)
>  {
>   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> +
> + ifq_barrier(ifq); /* ensure nothing is running with the ifq */
>  
>   /* don't need to lock because this is the last use of the ifq */
>  
> Index: sys/net/ifq.h
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.h,v
> retrieving revision 1.13
> diff -u -p -r1.13 ifq.h
> --- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13
> +++ sys/net/ifq.h 13 Nov 2017 00:46:17 -0000
> @@ -25,6 +25,7 @@ struct ifq_ops;
>  
>  struct ifqueue {
>   struct ifnet *ifq_if;
> + struct taskq *ifq_softnet;
>   union {
>   void *_ifq_softc;
>   /*
> @@ -57,6 +58,7 @@ struct ifqueue {
>   struct mutex ifq_task_mtx;
>   struct task_list ifq_task_list;
>   void *ifq_serializer;
> + struct task ifq_bundle;
>  
>   /* work to be serialised */
>   struct task ifq_start;
> @@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct
>  void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
>  void ifq_destroy(struct ifqueue *);
>  int ifq_enqueue(struct ifqueue *, struct mbuf *);
> +void ifq_start(struct ifqueue *);
>  struct mbuf *ifq_deq_begin(struct ifqueue *);
>  void ifq_deq_commit(struct ifqueue *, struct mbuf *);
>  void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
> @@ -411,12 +414,6 @@ static inline unsigned int
>  ifq_is_oactive(struct ifqueue *ifq)
>  {
>   return (ifq->ifq_oactive);
> -}
> -
> -static inline void
> -ifq_start(struct ifqueue *ifq)
> -{
> - ifq_serialize(ifq, &ifq->ifq_start);
>  }
>  
>  static inline void
>

Reply | Threaded
Open this post in threaded view
|

Re: try to bundle multiple packets on an ifq before sending

David Gwynne-5

> On 13 Nov 2017, at 5:37 pm, Martin Pieuchot <[hidden email]> wrote:
>
> On 13/11/17(Mon) 10:56, David Gwynne wrote:
>> On Sun, Nov 12, 2017 at 02:45:05PM +0100, Martin Pieuchot wrote:
>>> [...]
>>> We're currently using net_tq() to distribute load for incoming packets.
>>> So I believe you should schedule the task on the current taskq or the
>>> first one if coming from userland.
>>
>> /em shrug.
>>
>> i dont know how to tell what the current softnet tq is.
>
> That's something we need to know.  This will allow to have per taskq
> data structure.

maybe we could use curproc()->p_spare as an index into the array of softnets.

>
>>                                                        would it
>> be enough to simply not mix if the ifq_idx into the argument to
>> net_tq?
>
> Maybe.  If a CPU processing packets from myx0 in softnet0 and want to
> send forward via myx1, using net_tq() like you do it know will pick
> softnet1.
> My suggestion was to keep softnet1 busy with processing packets coming
> from myx1 and enqueue the task on softnet0.  This way no inter CPU
> communication is needed.
> But without testing we can't tell if it's more efficient the other way.

scheduling the bundled start routine as a task on the current softnet would require each ifq to allocate a task for each softnet, and would probably result in a lot of thrashing if multiple threads are pushing packets onto a ring. one of them would have to remove the work from all of them.

i think this is good enough for now. my tests with 2 softnets mixed in with this diff seemed ok. with 8 it sucked cos there was so much waiting on the netlock, so it's hard to say what the effect of this is.

>
>>>>          because ifq work could now be pending in a softnet taskq,
>>>> ifq_barrier also needs to put a barrier in the taskq. this is
>>>> implemented using taskq_barrier, which i wrote ages ago but didn't
>>>> have a use case for at the time.
>>
>> both hrvoje and i hit a deadlock when downing an interface. if
>> softnet (or softnets) are waiting on the netlock that and ioctl
>> path holds, then an ifq_barrier in that ioctl path will sleep forever
>> waiting for a task to run in softnets that are waiting for the
>> netlock.
>>
>> to mitigate this ive added code like what uvm has. thoughts?
>
> That's only called in ioctl(2) path, right?  Which commands end up
> there?

ifq_barrier is generally called from the drv_stop/drv_down path by mpsafe drivers. if you go ifconfig down is the most obvious. other ioctl paths include those that generate ENETRESET, eg, configuring lladdr. some drivers also call their down routine during its own reconfiguration, or to fix a stuck ring.

this diff also adds a call to ifq_barrier inside ifq_destroy, which is called when an interface is detached.

> The problem with driver *_ioctl() routines is that pseudo-driver need
> the NET_LOCK() while the others should not.  So I'd rather see the
> NET_LOCK() released before calling ifp->if_ioctl().
>
>> Index: share/man/man9/task_add.9
>> ===================================================================
>> RCS file: /cvs/src/share/man/man9/task_add.9,v
>> retrieving revision 1.16
>> diff -u -p -r1.16 task_add.9
>> --- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16
>> +++ share/man/man9/task_add.9 13 Nov 2017 00:46:17 -0000
>> @@ -20,6 +20,7 @@
>> .Sh NAME
>> .Nm taskq_create ,
>> .Nm taskq_destroy ,
>> +.Nm taskq_barrier ,
>> .Nm task_set ,
>> .Nm task_add ,
>> .Nm task_del ,
>> @@ -37,6 +38,8 @@
>> .Ft void
>> .Fn taskq_destroy "struct taskq *tq"
>> .Ft void
>> +.Fn taskq_barrier "struct taskq *tq"
>> +.Ft void
>> .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
>> .Ft int
>> .Fn task_add "struct taskq *tq" "struct task *t"
>> @@ -88,6 +91,15 @@ Calling
>> against the system taskq is an error and will lead to undefined
>> behaviour or a system fault.
>> .Pp
>> +.Fn taskq_barrier
>> +guarantees that any task that was running on the
>> +.Fa tq
>> +taskq when the barrier was called has finished by the time the barrier
>> +returns.
>> +.Fn taskq_barrier
>> +is only supported on taskqs serviced by 1 thread,
>> +and may not be called by a task running in the specified taskq.
>> +.Pp
>> It is the responsibility of the caller to provide the
>> .Fn task_set ,
>> .Fn task_add ,
>> @@ -163,6 +175,8 @@ argument given in
>> and
>> .Fn taskq_destroy
>> can be called during autoconf, or from process context.
>> +.Fn taskq_barrier
>> +can be called from process context.
>> .Fn task_set ,
>> .Fn task_add ,
>> and
>> Index: sys/sys/task.h
>> ===================================================================
>> RCS file: /cvs/src/sys/sys/task.h,v
>> retrieving revision 1.11
>> diff -u -p -r1.11 task.h
>> --- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11
>> +++ sys/sys/task.h 13 Nov 2017 00:46:17 -0000
>> @@ -43,6 +43,7 @@ extern struct taskq *const systqmp;
>>
>> struct taskq *taskq_create(const char *, unsigned int, int, unsigned int);
>> void taskq_destroy(struct taskq *);
>> +void taskq_barrier(struct taskq *);
>>
>> void task_set(struct task *, void (*)(void *), void *);
>> int task_add(struct taskq *, struct task *);
>> Index: sys/kern/kern_task.c
>> ===================================================================
>> RCS file: /cvs/src/sys/kern/kern_task.c,v
>> retrieving revision 1.20
>> diff -u -p -r1.20 kern_task.c
>> --- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20
>> +++ sys/kern/kern_task.c 13 Nov 2017 00:46:17 -0000
>> @@ -22,6 +22,7 @@
>> #include <sys/mutex.h>
>> #include <sys/kthread.h>
>> #include <sys/task.h>
>> +#include <sys/proc.h>
>>
>> #define TASK_ONQUEUE 1
>>
>> @@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys
>>
>> void taskq_init(void); /* called in init_main.c */
>> void taskq_create_thread(void *);
>> +void taskq_barrier_task(void *);
>> int taskq_sleep(const volatile void *, struct mutex *, int,
>>    const char *, int);
>> int taskq_next_work(struct taskq *, struct task *, sleepfn);
>> @@ -176,6 +178,30 @@ taskq_create_thread(void *arg)
>> } while (tq->tq_running < tq->tq_nthreads);
>>
>> mtx_leave(&tq->tq_mtx);
>> +}
>> +
>> +void
>> +taskq_barrier(struct taskq *tq)
>> +{
>> + struct sleep_state sls;
>> + unsigned int notdone = 1;
>> + struct task t = TASK_INITIALIZER(taskq_barrier_task, &notdone);
>> +
>> + task_add(tq, &t);
>> +
>> + while (notdone) {
>> + sleep_setup(&sls, &notdone, PWAIT, "tqbar");
>> + sleep_finish(&sls, notdone);
>> + }
>> +}
>> +
>> +void
>> +taskq_barrier_task(void *p)
>> +{
>> + unsigned int *notdone = p;
>> +
>> + *notdone = 0;
>> + wakeup_one(notdone);
>> }
>>
>> void
>> Index: sys/net/ifq.c
>> ===================================================================
>> RCS file: /cvs/src/sys/net/ifq.c,v
>> retrieving revision 1.12
>> diff -u -p -r1.12 ifq.c
>> --- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12
>> +++ sys/net/ifq.c 13 Nov 2017 00:46:17 -0000
>> @@ -64,9 +64,16 @@ struct priq {
>> void ifq_start_task(void *);
>> void ifq_restart_task(void *);
>> void ifq_barrier_task(void *);
>> +void ifq_bundle_task(void *);
>>
>> #define TASK_ONQUEUE 0x1
>>
>> +static inline void
>> +ifq_run_start(struct ifqueue *ifq)
>> +{
>> + ifq_serialize(ifq, &ifq->ifq_start);
>> +}
>> +
>> void
>> ifq_serialize(struct ifqueue *ifq, struct task *t)
>> {
>> @@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq)
>> }
>>
>> void
>> +ifq_start(struct ifqueue *ifq)
>> +{
>> + if (ifq_len(ifq) >= 4) {
>> + task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
>> + ifq_run_start(ifq);
>> +        } else
>> +                task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
>> +}
>> +
>> +void
>> ifq_start_task(void *p)
>> {
>> struct ifqueue *ifq = p;
>> @@ -131,6 +148,14 @@ ifq_restart_task(void *p)
>> }
>>
>> void
>> +ifq_bundle_task(void *p)
>> +{
>> + struct ifqueue *ifq = p;
>> +
>> + ifq_run_start(ifq);
>> +}
>> +
>> +void
>> ifq_barrier(struct ifqueue *ifq)
>> {
>> struct sleep_state sls;
>> @@ -140,6 +165,18 @@ ifq_barrier(struct ifqueue *ifq)
>> /* this should only be called from converted drivers */
>> KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
>>
>> + if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) {
>> + int netlocked = (rw_status(&netlock) == RW_WRITE);
>> +
>> + if (netlocked)
>> + NET_UNLOCK();
>> +
>> + taskq_barrier(ifq->ifq_softnet);
>> +
>> + if (netlocked)
>> + NET_LOCK();
>> + }
>> +
>> if (ifq->ifq_serializer == NULL)
>> return;
>>
>> @@ -168,6 +205,7 @@ void
>> ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
>> {
>> ifq->ifq_if = ifp;
>> + ifq->ifq_softnet = net_tq(ifp->if_index);
>> ifq->ifq_softc = NULL;
>>
>> mtx_init(&ifq->ifq_mtx, IPL_NET);
>> @@ -189,6 +227,7 @@ ifq_init(struct ifqueue *ifq, struct ifn
>> mtx_init(&ifq->ifq_task_mtx, IPL_NET);
>> TAILQ_INIT(&ifq->ifq_task_list);
>> ifq->ifq_serializer = NULL;
>> + task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
>>
>> task_set(&ifq->ifq_start, ifq_start_task, ifq);
>> task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
>> @@ -239,6 +278,8 @@ void
>> ifq_destroy(struct ifqueue *ifq)
>> {
>> struct mbuf_list ml = MBUF_LIST_INITIALIZER();
>> +
>> + ifq_barrier(ifq); /* ensure nothing is running with the ifq */
>>
>> /* don't need to lock because this is the last use of the ifq */
>>
>> Index: sys/net/ifq.h
>> ===================================================================
>> RCS file: /cvs/src/sys/net/ifq.h,v
>> retrieving revision 1.13
>> diff -u -p -r1.13 ifq.h
>> --- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13
>> +++ sys/net/ifq.h 13 Nov 2017 00:46:17 -0000
>> @@ -25,6 +25,7 @@ struct ifq_ops;
>>
>> struct ifqueue {
>> struct ifnet *ifq_if;
>> + struct taskq *ifq_softnet;
>> union {
>> void *_ifq_softc;
>> /*
>> @@ -57,6 +58,7 @@ struct ifqueue {
>> struct mutex ifq_task_mtx;
>> struct task_list ifq_task_list;
>> void *ifq_serializer;
>> + struct task ifq_bundle;
>>
>> /* work to be serialised */
>> struct task ifq_start;
>> @@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct
>> void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
>> void ifq_destroy(struct ifqueue *);
>> int ifq_enqueue(struct ifqueue *, struct mbuf *);
>> +void ifq_start(struct ifqueue *);
>> struct mbuf *ifq_deq_begin(struct ifqueue *);
>> void ifq_deq_commit(struct ifqueue *, struct mbuf *);
>> void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
>> @@ -411,12 +414,6 @@ static inline unsigned int
>> ifq_is_oactive(struct ifqueue *ifq)
>> {
>> return (ifq->ifq_oactive);
>> -}
>> -
>> -static inline void
>> -ifq_start(struct ifqueue *ifq)
>> -{
>> - ifq_serialize(ifq, &ifq->ifq_start);
>> }
>>
>> static inline void
>>

Reply | Threaded
Open this post in threaded view
|

Re: try to bundle multiple packets on an ifq before sending

Martin Pieuchot
On 13/11/17(Mon) 20:28, David Gwynne wrote:

>
> > On 13 Nov 2017, at 5:37 pm, Martin Pieuchot <[hidden email]> wrote:
> >
> > On 13/11/17(Mon) 10:56, David Gwynne wrote:
> >> On Sun, Nov 12, 2017 at 02:45:05PM +0100, Martin Pieuchot wrote:
> >>> [...]
> >>> We're currently using net_tq() to distribute load for incoming packets.
> >>> So I believe you should schedule the task on the current taskq or the
> >>> first one if coming from userland.
> >>
> >> /em shrug.
> >>
> >> i dont know how to tell what the current softnet tq is.
> >
> > That's something we need to know.  This will allow to have per taskq
> > data structure.
>
> maybe we could use curproc()->p_spare as an index into the array of softnets.

I'd would be handy.

> >> be enough to simply not mix if the ifq_idx into the argument to
> >> net_tq?
> >
> > Maybe.  If a CPU processing packets from myx0 in softnet0 and want to
> > send forward via myx1, using net_tq() like you do it know will pick
> > softnet1.
> > My suggestion was to keep softnet1 busy with processing packets coming
> > from myx1 and enqueue the task on softnet0.  This way no inter CPU
> > communication is needed.
> > But without testing we can't tell if it's more efficient the other way.
>
> scheduling the bundled start routine as a task on the current softnet would require each ifq to allocate a task for each softnet, and would probably result in a lot of thrashing if multiple threads are pushing packets onto a ring. one of them would have to remove the work from all of them.
>
> i think this is good enough for now. my tests with 2 softnets mixed in with this diff seemed ok. with 8 it sucked cos there was so much waiting on the netlock, so it's hard to say what the effect of this is.

Good to know.  Then let's move forward the way it is.
 

> >>>>          because ifq work could now be pending in a softnet taskq,
> >>>> ifq_barrier also needs to put a barrier in the taskq. this is
> >>>> implemented using taskq_barrier, which i wrote ages ago but didn't
> >>>> have a use case for at the time.
> >>
> >> both hrvoje and i hit a deadlock when downing an interface. if
> >> softnet (or softnets) are waiting on the netlock that and ioctl
> >> path holds, then an ifq_barrier in that ioctl path will sleep forever
> >> waiting for a task to run in softnets that are waiting for the
> >> netlock.
> >>
> >> to mitigate this ive added code like what uvm has. thoughts?
> >
> > That's only called in ioctl(2) path, right?  Which commands end up
> > there?
>
> ifq_barrier is generally called from the drv_stop/drv_down path by mpsafe drivers. if you go ifconfig down is the most obvious. other ioctl paths include those that generate ENETRESET, eg, configuring lladdr. some drivers also call their down routine during its own reconfiguration, or to fix a stuck ring.
>
> this diff also adds a call to ifq_barrier inside ifq_destroy, which is called when an interface is detached.

Then please add the following comment on top of the unlock:

        /* XXXSMP breaks atomicity */

It helps me keep track of the recursions that need to be fixed.  With
that ok mpi@

> > The problem with driver *_ioctl() routines is that pseudo-driver need
> > the NET_LOCK() while the others should not.  So I'd rather see the
> > NET_LOCK() released before calling ifp->if_ioctl().
> >
> >> Index: share/man/man9/task_add.9
> >> ===================================================================
> >> RCS file: /cvs/src/share/man/man9/task_add.9,v
> >> retrieving revision 1.16
> >> diff -u -p -r1.16 task_add.9
> >> --- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16
> >> +++ share/man/man9/task_add.9 13 Nov 2017 00:46:17 -0000
> >> @@ -20,6 +20,7 @@
> >> .Sh NAME
> >> .Nm taskq_create ,
> >> .Nm taskq_destroy ,
> >> +.Nm taskq_barrier ,
> >> .Nm task_set ,
> >> .Nm task_add ,
> >> .Nm task_del ,
> >> @@ -37,6 +38,8 @@
> >> .Ft void
> >> .Fn taskq_destroy "struct taskq *tq"
> >> .Ft void
> >> +.Fn taskq_barrier "struct taskq *tq"
> >> +.Ft void
> >> .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
> >> .Ft int
> >> .Fn task_add "struct taskq *tq" "struct task *t"
> >> @@ -88,6 +91,15 @@ Calling
> >> against the system taskq is an error and will lead to undefined
> >> behaviour or a system fault.
> >> .Pp
> >> +.Fn taskq_barrier
> >> +guarantees that any task that was running on the
> >> +.Fa tq
> >> +taskq when the barrier was called has finished by the time the barrier
> >> +returns.
> >> +.Fn taskq_barrier
> >> +is only supported on taskqs serviced by 1 thread,
> >> +and may not be called by a task running in the specified taskq.
> >> +.Pp
> >> It is the responsibility of the caller to provide the
> >> .Fn task_set ,
> >> .Fn task_add ,
> >> @@ -163,6 +175,8 @@ argument given in
> >> and
> >> .Fn taskq_destroy
> >> can be called during autoconf, or from process context.
> >> +.Fn taskq_barrier
> >> +can be called from process context.
> >> .Fn task_set ,
> >> .Fn task_add ,
> >> and
> >> Index: sys/sys/task.h
> >> ===================================================================
> >> RCS file: /cvs/src/sys/sys/task.h,v
> >> retrieving revision 1.11
> >> diff -u -p -r1.11 task.h
> >> --- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11
> >> +++ sys/sys/task.h 13 Nov 2017 00:46:17 -0000
> >> @@ -43,6 +43,7 @@ extern struct taskq *const systqmp;
> >>
> >> struct taskq *taskq_create(const char *, unsigned int, int, unsigned int);
> >> void taskq_destroy(struct taskq *);
> >> +void taskq_barrier(struct taskq *);
> >>
> >> void task_set(struct task *, void (*)(void *), void *);
> >> int task_add(struct taskq *, struct task *);
> >> Index: sys/kern/kern_task.c
> >> ===================================================================
> >> RCS file: /cvs/src/sys/kern/kern_task.c,v
> >> retrieving revision 1.20
> >> diff -u -p -r1.20 kern_task.c
> >> --- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20
> >> +++ sys/kern/kern_task.c 13 Nov 2017 00:46:17 -0000
> >> @@ -22,6 +22,7 @@
> >> #include <sys/mutex.h>
> >> #include <sys/kthread.h>
> >> #include <sys/task.h>
> >> +#include <sys/proc.h>
> >>
> >> #define TASK_ONQUEUE 1
> >>
> >> @@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys
> >>
> >> void taskq_init(void); /* called in init_main.c */
> >> void taskq_create_thread(void *);
> >> +void taskq_barrier_task(void *);
> >> int taskq_sleep(const volatile void *, struct mutex *, int,
> >>    const char *, int);
> >> int taskq_next_work(struct taskq *, struct task *, sleepfn);
> >> @@ -176,6 +178,30 @@ taskq_create_thread(void *arg)
> >> } while (tq->tq_running < tq->tq_nthreads);
> >>
> >> mtx_leave(&tq->tq_mtx);
> >> +}
> >> +
> >> +void
> >> +taskq_barrier(struct taskq *tq)
> >> +{
> >> + struct sleep_state sls;
> >> + unsigned int notdone = 1;
> >> + struct task t = TASK_INITIALIZER(taskq_barrier_task, &notdone);
> >> +
> >> + task_add(tq, &t);
> >> +
> >> + while (notdone) {
> >> + sleep_setup(&sls, &notdone, PWAIT, "tqbar");
> >> + sleep_finish(&sls, notdone);
> >> + }
> >> +}
> >> +
> >> +void
> >> +taskq_barrier_task(void *p)
> >> +{
> >> + unsigned int *notdone = p;
> >> +
> >> + *notdone = 0;
> >> + wakeup_one(notdone);
> >> }
> >>
> >> void
> >> Index: sys/net/ifq.c
> >> ===================================================================
> >> RCS file: /cvs/src/sys/net/ifq.c,v
> >> retrieving revision 1.12
> >> diff -u -p -r1.12 ifq.c
> >> --- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12
> >> +++ sys/net/ifq.c 13 Nov 2017 00:46:17 -0000
> >> @@ -64,9 +64,16 @@ struct priq {
> >> void ifq_start_task(void *);
> >> void ifq_restart_task(void *);
> >> void ifq_barrier_task(void *);
> >> +void ifq_bundle_task(void *);
> >>
> >> #define TASK_ONQUEUE 0x1
> >>
> >> +static inline void
> >> +ifq_run_start(struct ifqueue *ifq)
> >> +{
> >> + ifq_serialize(ifq, &ifq->ifq_start);
> >> +}
> >> +
> >> void
> >> ifq_serialize(struct ifqueue *ifq, struct task *t)
> >> {
> >> @@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq)
> >> }
> >>
> >> void
> >> +ifq_start(struct ifqueue *ifq)
> >> +{
> >> + if (ifq_len(ifq) >= 4) {
> >> + task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
> >> + ifq_run_start(ifq);
> >> +        } else
> >> +                task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
> >> +}
> >> +
> >> +void
> >> ifq_start_task(void *p)
> >> {
> >> struct ifqueue *ifq = p;
> >> @@ -131,6 +148,14 @@ ifq_restart_task(void *p)
> >> }
> >>
> >> void
> >> +ifq_bundle_task(void *p)
> >> +{
> >> + struct ifqueue *ifq = p;
> >> +
> >> + ifq_run_start(ifq);
> >> +}
> >> +
> >> +void
> >> ifq_barrier(struct ifqueue *ifq)
> >> {
> >> struct sleep_state sls;
> >> @@ -140,6 +165,18 @@ ifq_barrier(struct ifqueue *ifq)
> >> /* this should only be called from converted drivers */
> >> KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
> >>
> >> + if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) {
> >> + int netlocked = (rw_status(&netlock) == RW_WRITE);
> >> +
> >> + if (netlocked)
> >> + NET_UNLOCK();
> >> +
> >> + taskq_barrier(ifq->ifq_softnet);
> >> +
> >> + if (netlocked)
> >> + NET_LOCK();
> >> + }
> >> +
> >> if (ifq->ifq_serializer == NULL)
> >> return;
> >>
> >> @@ -168,6 +205,7 @@ void
> >> ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
> >> {
> >> ifq->ifq_if = ifp;
> >> + ifq->ifq_softnet = net_tq(ifp->if_index);
> >> ifq->ifq_softc = NULL;
> >>
> >> mtx_init(&ifq->ifq_mtx, IPL_NET);
> >> @@ -189,6 +227,7 @@ ifq_init(struct ifqueue *ifq, struct ifn
> >> mtx_init(&ifq->ifq_task_mtx, IPL_NET);
> >> TAILQ_INIT(&ifq->ifq_task_list);
> >> ifq->ifq_serializer = NULL;
> >> + task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
> >>
> >> task_set(&ifq->ifq_start, ifq_start_task, ifq);
> >> task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
> >> @@ -239,6 +278,8 @@ void
> >> ifq_destroy(struct ifqueue *ifq)
> >> {
> >> struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> >> +
> >> + ifq_barrier(ifq); /* ensure nothing is running with the ifq */
> >>
> >> /* don't need to lock because this is the last use of the ifq */
> >>
> >> Index: sys/net/ifq.h
> >> ===================================================================
> >> RCS file: /cvs/src/sys/net/ifq.h,v
> >> retrieving revision 1.13
> >> diff -u -p -r1.13 ifq.h
> >> --- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13
> >> +++ sys/net/ifq.h 13 Nov 2017 00:46:17 -0000
> >> @@ -25,6 +25,7 @@ struct ifq_ops;
> >>
> >> struct ifqueue {
> >> struct ifnet *ifq_if;
> >> + struct taskq *ifq_softnet;
> >> union {
> >> void *_ifq_softc;
> >> /*
> >> @@ -57,6 +58,7 @@ struct ifqueue {
> >> struct mutex ifq_task_mtx;
> >> struct task_list ifq_task_list;
> >> void *ifq_serializer;
> >> + struct task ifq_bundle;
> >>
> >> /* work to be serialised */
> >> struct task ifq_start;
> >> @@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct
> >> void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
> >> void ifq_destroy(struct ifqueue *);
> >> int ifq_enqueue(struct ifqueue *, struct mbuf *);
> >> +void ifq_start(struct ifqueue *);
> >> struct mbuf *ifq_deq_begin(struct ifqueue *);
> >> void ifq_deq_commit(struct ifqueue *, struct mbuf *);
> >> void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
> >> @@ -411,12 +414,6 @@ static inline unsigned int
> >> ifq_is_oactive(struct ifqueue *ifq)
> >> {
> >> return (ifq->ifq_oactive);
> >> -}
> >> -
> >> -static inline void
> >> -ifq_start(struct ifqueue *ifq)
> >> -{
> >> - ifq_serialize(ifq, &ifq->ifq_start);
> >> }
> >>
> >> static inline void
> >>
>