drm(4), multi-threaded task queues and barriers

classic Classic list List threaded Threaded
12 messages Options
Reply | Threaded
Open this post in threaded view
|

drm(4), multi-threaded task queues and barriers

Mark Kettenis
The drm(4) codebase really needs multi-threaded task queues since the
code has taks that wait for the completion of other tasks that are
submitted to the same task queue.  Thank you Linux...

Unfortunately the code also needs to wait for the completion of
submitted tasks from other threads.  This implemented using
task_barrier(9).  But unfortunately our current implementation only
works for single-threaded task queues.

The diff below fixes this by marking the barrier task with a flag and
making sure that all threads of the task queue are syncchronized.
This achived through a TASK_BARRIER flag that simply blocks the
threads until the last unblocked thread sees the flag and executes the
task.

The diff also starts 4 threads for each workqueue that gets created by
the drm(4) layer.  The number 4 is a bit arbitrary but it is the
number of threads that Linux creates per CPU for a so-called "unbound"
workqueue which hopefully is enough to always make progress.

Please test.  If you experience a "hang" with this diff, please try to
log in to the machine remotely over ssh and send me and jsg@ the
output of "ps -AHwlk".

Thanks,

Mark


Index: dev/pci/drm/drm_linux.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.38
diff -u -p -r1.38 drm_linux.c
--- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
+++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
@@ -1399,15 +1399,15 @@ drm_linux_init(void)
 {
  if (system_wq == NULL) {
  system_wq = (struct workqueue_struct *)
-    taskq_create("drmwq", 1, IPL_HIGH, 0);
+    taskq_create("drmwq", 4, IPL_HIGH, 0);
  }
  if (system_unbound_wq == NULL) {
  system_unbound_wq = (struct workqueue_struct *)
-    taskq_create("drmubwq", 1, IPL_HIGH, 0);
+    taskq_create("drmubwq", 4, IPL_HIGH, 0);
  }
  if (system_long_wq == NULL) {
  system_long_wq = (struct workqueue_struct *)
-    taskq_create("drmlwq", 1, IPL_HIGH, 0);
+    taskq_create("drmlwq", 4, IPL_HIGH, 0);
  }
 
  if (taskletq == NULL)
Index: dev/pci/drm/i915/intel_hotplug.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
retrieving revision 1.2
diff -u -p -r1.2 intel_hotplug.c
--- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
+++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
@@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
  INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
  INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
  INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
- dev_priv->hotplug.poll_init_work.tq = systq;
  INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
   intel_hpd_irq_storm_reenable_work);
 }
Index: kern/kern_task.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_task.c,v
retrieving revision 1.25
diff -u -p -r1.25 kern_task.c
--- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
+++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
@@ -43,6 +43,7 @@ struct taskq {
  TQ_S_DESTROYED
  } tq_state;
  unsigned int tq_running;
+ unsigned int tq_barrier;
  unsigned int tq_nthreads;
  unsigned int tq_flags;
  const char *tq_name;
@@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
 struct taskq taskq_sys = {
  TQ_S_CREATED,
  0,
+ 0,
  1,
  0,
  taskq_sys_name,
@@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
 struct taskq taskq_sys_mp = {
  TQ_S_CREATED,
  0,
+ 0,
  1,
  TASKQ_MPSAFE,
  taskq_sys_mp_name,
@@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
 
  tq->tq_state = TQ_S_CREATED;
  tq->tq_running = 0;
+ tq->tq_barrier = 0;
  tq->tq_nthreads = nthreads;
  tq->tq_name = name;
  tq->tq_flags = flags;
@@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
  panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
  }
 
+ tq->tq_barrier = 0;
  while (tq->tq_running > 0) {
  wakeup(tq);
  msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
@@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
 
  WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
 
+ SET(t.t_flags, TASK_BARRIER);
  task_add(tq, &t);
  cond_wait(&c, "tqbar");
 }
@@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
  if (task_del(tq, del))
  return;
 
+ SET(t.t_flags, TASK_BARRIER);
  task_add(tq, &t);
  cond_wait(&c, "tqbar");
 }
@@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
  struct task *next;
 
  mtx_enter(&tq->tq_mtx);
+retry:
  while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
  if (tq->tq_state != TQ_S_RUNNING) {
  mtx_leave(&tq->tq_mtx);
@@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
  }
 
  msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
+ }
+
+ if (ISSET(next->t_flags, TASK_BARRIER)) {
+ if (++tq->tq_barrier == tq->tq_nthreads) {
+ tq->tq_barrier = 0;
+ wakeup(tq);
+ } else {
+ while (tq->tq_barrier > 0)
+ msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
+ goto retry;
+ }
  }
 
  TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
Index: sys/task.h
===================================================================
RCS file: /cvs/src/sys/sys/task.h,v
retrieving revision 1.15
diff -u -p -r1.15 task.h
--- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
+++ sys/task.h 11 Jun 2019 18:54:39 -0000
@@ -31,6 +31,7 @@ struct task {
 };
 
 #define TASK_ONQUEUE 1
+#define TASK_BARRIER 2
 
 TAILQ_HEAD(task_list, task);
 

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Jonathan Gray-11
On Tue, Jun 11, 2019 at 09:10:46PM +0200, Mark Kettenis wrote:

> The drm(4) codebase really needs multi-threaded task queues since the
> code has taks that wait for the completion of other tasks that are
> submitted to the same task queue.  Thank you Linux...
>
> Unfortunately the code also needs to wait for the completion of
> submitted tasks from other threads.  This implemented using
> task_barrier(9).  But unfortunately our current implementation only
> works for single-threaded task queues.
>
> The diff below fixes this by marking the barrier task with a flag and
> making sure that all threads of the task queue are syncchronized.
> This achived through a TASK_BARRIER flag that simply blocks the
> threads until the last unblocked thread sees the flag and executes the
> task.
>
> The diff also starts 4 threads for each workqueue that gets created by
> the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> number of threads that Linux creates per CPU for a so-called "unbound"
> workqueue which hopefully is enough to always make progress.

Ignoring taskletq and systq use in burner_task/switchtask across
drivers and the local only backlight_schedule_update_status(), was it
intentional to exclude:

workqueue.h: alloc_workqueue()
        struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
workqueue.h: alloc_ordered_workqueue()
        struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
workqueue.h:
        #define system_power_efficient_wq ((struct workqueue_struct *)systq)

>
> Please test.  If you experience a "hang" with this diff, please try to
> log in to the machine remotely over ssh and send me and jsg@ the
> output of "ps -AHwlk".
>
> Thanks,
>
> Mark
>
>
> Index: dev/pci/drm/drm_linux.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.38
> diff -u -p -r1.38 drm_linux.c
> --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> +++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
> @@ -1399,15 +1399,15 @@ drm_linux_init(void)
>  {
>   if (system_wq == NULL) {
>   system_wq = (struct workqueue_struct *)
> -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmwq", 4, IPL_HIGH, 0);
>   }
>   if (system_unbound_wq == NULL) {
>   system_unbound_wq = (struct workqueue_struct *)
> -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
>   }
>   if (system_long_wq == NULL) {
>   system_long_wq = (struct workqueue_struct *)
> -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
>   }
>  
>   if (taskletq == NULL)
> Index: dev/pci/drm/i915/intel_hotplug.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 intel_hotplug.c
> --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> +++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
> @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
>   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
>   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
>   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> - dev_priv->hotplug.poll_init_work.tq = systq;
>   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
>    intel_hpd_irq_storm_reenable_work);
>  }
> Index: kern/kern_task.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_task.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 kern_task.c
> --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> +++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
> @@ -43,6 +43,7 @@ struct taskq {
>   TQ_S_DESTROYED
>   } tq_state;
>   unsigned int tq_running;
> + unsigned int tq_barrier;
>   unsigned int tq_nthreads;
>   unsigned int tq_flags;
>   const char *tq_name;
> @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
>  struct taskq taskq_sys = {
>   TQ_S_CREATED,
>   0,
> + 0,
>   1,
>   0,
>   taskq_sys_name,
> @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
>  struct taskq taskq_sys_mp = {
>   TQ_S_CREATED,
>   0,
> + 0,
>   1,
>   TASKQ_MPSAFE,
>   taskq_sys_mp_name,
> @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
>  
>   tq->tq_state = TQ_S_CREATED;
>   tq->tq_running = 0;
> + tq->tq_barrier = 0;
>   tq->tq_nthreads = nthreads;
>   tq->tq_name = name;
>   tq->tq_flags = flags;
> @@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
>   panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
>   }
>  
> + tq->tq_barrier = 0;
>   while (tq->tq_running > 0) {
>   wakeup(tq);
>   msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
> @@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
>  
>   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
>  
> + SET(t.t_flags, TASK_BARRIER);
>   task_add(tq, &t);
>   cond_wait(&c, "tqbar");
>  }
> @@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
>   if (task_del(tq, del))
>   return;
>  
> + SET(t.t_flags, TASK_BARRIER);
>   task_add(tq, &t);
>   cond_wait(&c, "tqbar");
>  }
> @@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
>   struct task *next;
>  
>   mtx_enter(&tq->tq_mtx);
> +retry:
>   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
>   if (tq->tq_state != TQ_S_RUNNING) {
>   mtx_leave(&tq->tq_mtx);
> @@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
>   }
>  
>   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> + }
> +
> + if (ISSET(next->t_flags, TASK_BARRIER)) {
> + if (++tq->tq_barrier == tq->tq_nthreads) {
> + tq->tq_barrier = 0;
> + wakeup(tq);
> + } else {
> + while (tq->tq_barrier > 0)
> + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> + goto retry;
> + }
>   }
>  
>   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> Index: sys/task.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/task.h,v
> retrieving revision 1.15
> diff -u -p -r1.15 task.h
> --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> +++ sys/task.h 11 Jun 2019 18:54:39 -0000
> @@ -31,6 +31,7 @@ struct task {
>  };
>  
>  #define TASK_ONQUEUE 1
> +#define TASK_BARRIER 2
>  
>  TAILQ_HEAD(task_list, task);
>  
>

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Mark Kettenis
> Date: Wed, 12 Jun 2019 17:04:10 +1000
> From: Jonathan Gray <[hidden email]>
>
> On Tue, Jun 11, 2019 at 09:10:46PM +0200, Mark Kettenis wrote:
> > The drm(4) codebase really needs multi-threaded task queues since the
> > code has taks that wait for the completion of other tasks that are
> > submitted to the same task queue.  Thank you Linux...
> >
> > Unfortunately the code also needs to wait for the completion of
> > submitted tasks from other threads.  This implemented using
> > task_barrier(9).  But unfortunately our current implementation only
> > works for single-threaded task queues.
> >
> > The diff below fixes this by marking the barrier task with a flag and
> > making sure that all threads of the task queue are syncchronized.
> > This achived through a TASK_BARRIER flag that simply blocks the
> > threads until the last unblocked thread sees the flag and executes the
> > task.
> >
> > The diff also starts 4 threads for each workqueue that gets created by
> > the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> > number of threads that Linux creates per CPU for a so-called "unbound"
> > workqueue which hopefully is enough to always make progress.
>
> Ignoring taskletq and systq use in burner_task/switchtask across
> drivers and the local only backlight_schedule_update_status(), was it
> intentional to exclude:
>
> workqueue.h: alloc_workqueue()
> struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> workqueue.h: alloc_ordered_workqueue()
> struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> workqueue.h:
> #define system_power_efficient_wq ((struct workqueue_struct *)systq)

Not entirely.  But I don't want to spawn too many threads...  If this
diff seems to work, we might want to tweak the numbers a bit to see
what works best.  Whatever we do, the ordered workqueues need to stay
single-threaded.

> > Please test.  If you experience a "hang" with this diff, please try to
> > log in to the machine remotely over ssh and send me and jsg@ the
> > output of "ps -AHwlk".
> >
> > Thanks,
> >
> > Mark
> >
> >
> > Index: dev/pci/drm/drm_linux.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > retrieving revision 1.38
> > diff -u -p -r1.38 drm_linux.c
> > --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> > +++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
> > @@ -1399,15 +1399,15 @@ drm_linux_init(void)
> >  {
> >   if (system_wq == NULL) {
> >   system_wq = (struct workqueue_struct *)
> > -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> > +    taskq_create("drmwq", 4, IPL_HIGH, 0);
> >   }
> >   if (system_unbound_wq == NULL) {
> >   system_unbound_wq = (struct workqueue_struct *)
> > -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> > +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
> >   }
> >   if (system_long_wq == NULL) {
> >   system_long_wq = (struct workqueue_struct *)
> > -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> > +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
> >   }
> >  
> >   if (taskletq == NULL)
> > Index: dev/pci/drm/i915/intel_hotplug.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> > retrieving revision 1.2
> > diff -u -p -r1.2 intel_hotplug.c
> > --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> > +++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
> > @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
> >   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
> >   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
> >   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> > - dev_priv->hotplug.poll_init_work.tq = systq;
> >   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
> >    intel_hpd_irq_storm_reenable_work);
> >  }
> > Index: kern/kern_task.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/kern_task.c,v
> > retrieving revision 1.25
> > diff -u -p -r1.25 kern_task.c
> > --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> > +++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
> > @@ -43,6 +43,7 @@ struct taskq {
> >   TQ_S_DESTROYED
> >   } tq_state;
> >   unsigned int tq_running;
> > + unsigned int tq_barrier;
> >   unsigned int tq_nthreads;
> >   unsigned int tq_flags;
> >   const char *tq_name;
> > @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
> >  struct taskq taskq_sys = {
> >   TQ_S_CREATED,
> >   0,
> > + 0,
> >   1,
> >   0,
> >   taskq_sys_name,
> > @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
> >  struct taskq taskq_sys_mp = {
> >   TQ_S_CREATED,
> >   0,
> > + 0,
> >   1,
> >   TASKQ_MPSAFE,
> >   taskq_sys_mp_name,
> > @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
> >  
> >   tq->tq_state = TQ_S_CREATED;
> >   tq->tq_running = 0;
> > + tq->tq_barrier = 0;
> >   tq->tq_nthreads = nthreads;
> >   tq->tq_name = name;
> >   tq->tq_flags = flags;
> > @@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
> >   panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
> >   }
> >  
> > + tq->tq_barrier = 0;
> >   while (tq->tq_running > 0) {
> >   wakeup(tq);
> >   msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
> > @@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
> >  
> >   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
> >  
> > + SET(t.t_flags, TASK_BARRIER);
> >   task_add(tq, &t);
> >   cond_wait(&c, "tqbar");
> >  }
> > @@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
> >   if (task_del(tq, del))
> >   return;
> >  
> > + SET(t.t_flags, TASK_BARRIER);
> >   task_add(tq, &t);
> >   cond_wait(&c, "tqbar");
> >  }
> > @@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
> >   struct task *next;
> >  
> >   mtx_enter(&tq->tq_mtx);
> > +retry:
> >   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
> >   if (tq->tq_state != TQ_S_RUNNING) {
> >   mtx_leave(&tq->tq_mtx);
> > @@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
> >   }
> >  
> >   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> > + }
> > +
> > + if (ISSET(next->t_flags, TASK_BARRIER)) {
> > + if (++tq->tq_barrier == tq->tq_nthreads) {
> > + tq->tq_barrier = 0;
> > + wakeup(tq);
> > + } else {
> > + while (tq->tq_barrier > 0)
> > + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> > + goto retry;
> > + }
> >   }
> >  
> >   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> > Index: sys/task.h
> > ===================================================================
> > RCS file: /cvs/src/sys/sys/task.h,v
> > retrieving revision 1.15
> > diff -u -p -r1.15 task.h
> > --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> > +++ sys/task.h 11 Jun 2019 18:54:39 -0000
> > @@ -31,6 +31,7 @@ struct task {
> >  };
> >  
> >  #define TASK_ONQUEUE 1
> > +#define TASK_BARRIER 2
> >  
> >  TAILQ_HEAD(task_list, task);
> >  
> >
>

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Sven M. Hallberg
In reply to this post by Mark Kettenis
Mark Kettenis on Tue, Jun 11 2019:
> The drm(4) codebase really needs multi-threaded task queues [...]
>
> The diff also starts 4 threads for each workqueue that gets created by
> the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> number of threads that Linux creates per CPU for a so-called "unbound"
> workqueue which hopefully is enough to always make progress.
>
> Please test.

Looks good and appears to work fine with two displays (one internal, one
external). Will test with three at work tomorrow.


> - dev_priv->hotplug.poll_init_work.tq = systq;

Intentional?


-p


> Index: dev/pci/drm/drm_linux.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.38
> diff -u -p -r1.38 drm_linux.c
> --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> +++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
> @@ -1399,15 +1399,15 @@ drm_linux_init(void)
>  {
>   if (system_wq == NULL) {
>   system_wq = (struct workqueue_struct *)
> -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmwq", 4, IPL_HIGH, 0);
>   }
>   if (system_unbound_wq == NULL) {
>   system_unbound_wq = (struct workqueue_struct *)
> -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
>   }
>   if (system_long_wq == NULL) {
>   system_long_wq = (struct workqueue_struct *)
> -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
>   }
>  
>   if (taskletq == NULL)
> Index: dev/pci/drm/i915/intel_hotplug.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 intel_hotplug.c
> --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> +++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
> @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
>   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
>   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
>   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> - dev_priv->hotplug.poll_init_work.tq = systq;
>   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
>    intel_hpd_irq_storm_reenable_work);
>  }
> Index: kern/kern_task.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_task.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 kern_task.c
> --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> +++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
> @@ -43,6 +43,7 @@ struct taskq {
>   TQ_S_DESTROYED
>   } tq_state;
>   unsigned int tq_running;
> + unsigned int tq_barrier;
>   unsigned int tq_nthreads;
>   unsigned int tq_flags;
>   const char *tq_name;
> @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
>  struct taskq taskq_sys = {
>   TQ_S_CREATED,
>   0,
> + 0,
>   1,
>   0,
>   taskq_sys_name,
> @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
>  struct taskq taskq_sys_mp = {
>   TQ_S_CREATED,
>   0,
> + 0,
>   1,
>   TASKQ_MPSAFE,
>   taskq_sys_mp_name,
> @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
>  
>   tq->tq_state = TQ_S_CREATED;
>   tq->tq_running = 0;
> + tq->tq_barrier = 0;
>   tq->tq_nthreads = nthreads;
>   tq->tq_name = name;
>   tq->tq_flags = flags;
> @@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
>   panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
>   }
>  
> + tq->tq_barrier = 0;
>   while (tq->tq_running > 0) {
>   wakeup(tq);
>   msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
> @@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
>  
>   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
>  
> + SET(t.t_flags, TASK_BARRIER);
>   task_add(tq, &t);
>   cond_wait(&c, "tqbar");
>  }
> @@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
>   if (task_del(tq, del))
>   return;
>  
> + SET(t.t_flags, TASK_BARRIER);
>   task_add(tq, &t);
>   cond_wait(&c, "tqbar");
>  }
> @@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
>   struct task *next;
>  
>   mtx_enter(&tq->tq_mtx);
> +retry:
>   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
>   if (tq->tq_state != TQ_S_RUNNING) {
>   mtx_leave(&tq->tq_mtx);
> @@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
>   }
>  
>   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> + }
> +
> + if (ISSET(next->t_flags, TASK_BARRIER)) {
> + if (++tq->tq_barrier == tq->tq_nthreads) {
> + tq->tq_barrier = 0;
> + wakeup(tq);
> + } else {
> + while (tq->tq_barrier > 0)
> + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> + goto retry;
> + }
>   }
>  
>   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> Index: sys/task.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/task.h,v
> retrieving revision 1.15
> diff -u -p -r1.15 task.h
> --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> +++ sys/task.h 11 Jun 2019 18:54:39 -0000
> @@ -31,6 +31,7 @@ struct task {
>  };
>  
>  #define TASK_ONQUEUE 1
> +#define TASK_BARRIER 2
>  
>  TAILQ_HEAD(task_list, task);
>  

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Mark Kettenis
> From: "Sven M. Hallberg" <[hidden email]>
> Date: Wed, 12 Jun 2019 23:18:24 +0200
>
> Mark Kettenis on Tue, Jun 11 2019:
> > The drm(4) codebase really needs multi-threaded task queues [...]
> >
> > The diff also starts 4 threads for each workqueue that gets created by
> > the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> > number of threads that Linux creates per CPU for a so-called "unbound"
> > workqueue which hopefully is enough to always make progress.
> >
> > Please test.
>
> Looks good and appears to work fine with two displays (one internal, one
> external). Will test with three at work tomorrow.
>
>
> > - dev_priv->hotplug.poll_init_work.tq = systq;
>
> Intentional?

Yes.  It removes a local modification that should no longer be necessary.

Unfortunately the diff doesn't work with amdgpu.  Some more thinking
needed...

> > Index: dev/pci/drm/drm_linux.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > retrieving revision 1.38
> > diff -u -p -r1.38 drm_linux.c
> > --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> > +++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
> > @@ -1399,15 +1399,15 @@ drm_linux_init(void)
> >  {
> >   if (system_wq == NULL) {
> >   system_wq = (struct workqueue_struct *)
> > -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> > +    taskq_create("drmwq", 4, IPL_HIGH, 0);
> >   }
> >   if (system_unbound_wq == NULL) {
> >   system_unbound_wq = (struct workqueue_struct *)
> > -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> > +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
> >   }
> >   if (system_long_wq == NULL) {
> >   system_long_wq = (struct workqueue_struct *)
> > -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> > +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
> >   }
> >  
> >   if (taskletq == NULL)
> > Index: dev/pci/drm/i915/intel_hotplug.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> > retrieving revision 1.2
> > diff -u -p -r1.2 intel_hotplug.c
> > --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> > +++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
> > @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
> >   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
> >   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
> >   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> > - dev_priv->hotplug.poll_init_work.tq = systq;
> >   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
> >    intel_hpd_irq_storm_reenable_work);
> >  }
> > Index: kern/kern_task.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/kern_task.c,v
> > retrieving revision 1.25
> > diff -u -p -r1.25 kern_task.c
> > --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> > +++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
> > @@ -43,6 +43,7 @@ struct taskq {
> >   TQ_S_DESTROYED
> >   } tq_state;
> >   unsigned int tq_running;
> > + unsigned int tq_barrier;
> >   unsigned int tq_nthreads;
> >   unsigned int tq_flags;
> >   const char *tq_name;
> > @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
> >  struct taskq taskq_sys = {
> >   TQ_S_CREATED,
> >   0,
> > + 0,
> >   1,
> >   0,
> >   taskq_sys_name,
> > @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
> >  struct taskq taskq_sys_mp = {
> >   TQ_S_CREATED,
> >   0,
> > + 0,
> >   1,
> >   TASKQ_MPSAFE,
> >   taskq_sys_mp_name,
> > @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
> >  
> >   tq->tq_state = TQ_S_CREATED;
> >   tq->tq_running = 0;
> > + tq->tq_barrier = 0;
> >   tq->tq_nthreads = nthreads;
> >   tq->tq_name = name;
> >   tq->tq_flags = flags;
> > @@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
> >   panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
> >   }
> >  
> > + tq->tq_barrier = 0;
> >   while (tq->tq_running > 0) {
> >   wakeup(tq);
> >   msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
> > @@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
> >  
> >   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
> >  
> > + SET(t.t_flags, TASK_BARRIER);
> >   task_add(tq, &t);
> >   cond_wait(&c, "tqbar");
> >  }
> > @@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
> >   if (task_del(tq, del))
> >   return;
> >  
> > + SET(t.t_flags, TASK_BARRIER);
> >   task_add(tq, &t);
> >   cond_wait(&c, "tqbar");
> >  }
> > @@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
> >   struct task *next;
> >  
> >   mtx_enter(&tq->tq_mtx);
> > +retry:
> >   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
> >   if (tq->tq_state != TQ_S_RUNNING) {
> >   mtx_leave(&tq->tq_mtx);
> > @@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
> >   }
> >  
> >   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> > + }
> > +
> > + if (ISSET(next->t_flags, TASK_BARRIER)) {
> > + if (++tq->tq_barrier == tq->tq_nthreads) {
> > + tq->tq_barrier = 0;
> > + wakeup(tq);
> > + } else {
> > + while (tq->tq_barrier > 0)
> > + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> > + goto retry;
> > + }
> >   }
> >  
> >   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> > Index: sys/task.h
> > ===================================================================
> > RCS file: /cvs/src/sys/sys/task.h,v
> > retrieving revision 1.15
> > diff -u -p -r1.15 task.h
> > --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> > +++ sys/task.h 11 Jun 2019 18:54:39 -0000
> > @@ -31,6 +31,7 @@ struct task {
> >  };
> >  
> >  #define TASK_ONQUEUE 1
> > +#define TASK_BARRIER 2
> >  
> >  TAILQ_HEAD(task_list, task);
> >  
>

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Sven M. Hallberg
Mark Kettenis on Wed, Jun 12 2019:
>> Looks good and appears to work fine with two displays (one internal, one
>> external). Will test with three at work tomorrow.

Your diff also works for me with three displays (inteldrm).

> Unfortunately the diff doesn't work with amdgpu.  Some more thinking
> needed...

I would be interested to hear what the issue is.

Regards,
pesco

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Mark Kettenis
In reply to this post by Mark Kettenis
> Date: Wed, 12 Jun 2019 23:57:27 +0200 (CEST)
> From: Mark Kettenis <[hidden email]>
>
> > From: "Sven M. Hallberg" <[hidden email]>
> > Date: Wed, 12 Jun 2019 23:18:24 +0200
> >
> > Mark Kettenis on Tue, Jun 11 2019:
> > > The drm(4) codebase really needs multi-threaded task queues [...]
> > >
> > > The diff also starts 4 threads for each workqueue that gets created by
> > > the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> > > number of threads that Linux creates per CPU for a so-called "unbound"
> > > workqueue which hopefully is enough to always make progress.
> > >
> > > Please test.
> >
> > Looks good and appears to work fine with two displays (one internal, one
> > external). Will test with three at work tomorrow.
> >
> >
> > > - dev_priv->hotplug.poll_init_work.tq = systq;
> >
> > Intentional?
>
> Yes.  It removes a local modification that should no longer be necessary.
>
> Unfortunately the diff doesn't work with amdgpu.  Some more thinking
> needed...

So here is a diff that fixes the problem as far as I can tell.
Jonathan, Sven, can you give this a go?

Index: dev/pci/drm/drm_linux.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.38
diff -u -p -r1.38 drm_linux.c
--- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
+++ dev/pci/drm/drm_linux.c 13 Jun 2019 20:53:23 -0000
@@ -1399,15 +1399,15 @@ drm_linux_init(void)
 {
  if (system_wq == NULL) {
  system_wq = (struct workqueue_struct *)
-    taskq_create("drmwq", 1, IPL_HIGH, 0);
+    taskq_create("drmwq", 4, IPL_HIGH, 0);
  }
  if (system_unbound_wq == NULL) {
  system_unbound_wq = (struct workqueue_struct *)
-    taskq_create("drmubwq", 1, IPL_HIGH, 0);
+    taskq_create("drmubwq", 4, IPL_HIGH, 0);
  }
  if (system_long_wq == NULL) {
  system_long_wq = (struct workqueue_struct *)
-    taskq_create("drmlwq", 1, IPL_HIGH, 0);
+    taskq_create("drmlwq", 4, IPL_HIGH, 0);
  }
 
  if (taskletq == NULL)
Index: dev/pci/drm/i915/intel_hotplug.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
retrieving revision 1.2
diff -u -p -r1.2 intel_hotplug.c
--- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
+++ dev/pci/drm/i915/intel_hotplug.c 13 Jun 2019 20:53:23 -0000
@@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
  INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
  INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
  INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
- dev_priv->hotplug.poll_init_work.tq = systq;
  INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
   intel_hpd_irq_storm_reenable_work);
 }
Index: kern/kern_task.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_task.c,v
retrieving revision 1.25
diff -u -p -r1.25 kern_task.c
--- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
+++ kern/kern_task.c 13 Jun 2019 20:53:23 -0000
@@ -43,6 +43,7 @@ struct taskq {
  TQ_S_DESTROYED
  } tq_state;
  unsigned int tq_running;
+ unsigned int tq_waiting;
  unsigned int tq_nthreads;
  unsigned int tq_flags;
  const char *tq_name;
@@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
 struct taskq taskq_sys = {
  TQ_S_CREATED,
  0,
+ 0,
  1,
  0,
  taskq_sys_name,
@@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
 struct taskq taskq_sys_mp = {
  TQ_S_CREATED,
  0,
+ 0,
  1,
  TASKQ_MPSAFE,
  taskq_sys_mp_name,
@@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
 
  tq->tq_state = TQ_S_CREATED;
  tq->tq_running = 0;
+ tq->tq_waiting = 0;
  tq->tq_nthreads = nthreads;
  tq->tq_name = name;
  tq->tq_flags = flags;
@@ -223,6 +227,7 @@ taskq_barrier(struct taskq *tq)
 
  WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
 
+ SET(t.t_flags, TASK_BARRIER);
  task_add(tq, &t);
  cond_wait(&c, "tqbar");
 }
@@ -238,6 +243,7 @@ taskq_del_barrier(struct taskq *tq, stru
  if (task_del(tq, del))
  return;
 
+ SET(t.t_flags, TASK_BARRIER);
  task_add(tq, &t);
  cond_wait(&c, "tqbar");
 }
@@ -304,13 +310,26 @@ taskq_next_work(struct taskq *tq, struct
  struct task *next;
 
  mtx_enter(&tq->tq_mtx);
+retry:
  while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
  if (tq->tq_state != TQ_S_RUNNING) {
  mtx_leave(&tq->tq_mtx);
  return (0);
  }
 
+ tq->tq_waiting++;
  msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
+ tq->tq_waiting--;
+ }
+
+ if (ISSET(next->t_flags, TASK_BARRIER)) {
+ if (++tq->tq_waiting == tq->tq_nthreads) {
+ tq->tq_waiting--;
+ } else {
+ msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
+ tq->tq_waiting--;
+ goto retry;
+ }
  }
 
  TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
Index: sys/task.h
===================================================================
RCS file: /cvs/src/sys/sys/task.h,v
retrieving revision 1.15
diff -u -p -r1.15 task.h
--- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
+++ sys/task.h 13 Jun 2019 20:53:23 -0000
@@ -31,6 +31,7 @@ struct task {
 };
 
 #define TASK_ONQUEUE 1
+#define TASK_BARRIER 2
 
 TAILQ_HEAD(task_list, task);
 

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Jonathan Gray-11
On Thu, Jun 13, 2019 at 11:27:57PM +0200, Mark Kettenis wrote:

> > Date: Wed, 12 Jun 2019 23:57:27 +0200 (CEST)
> > From: Mark Kettenis <[hidden email]>
> >
> > > From: "Sven M. Hallberg" <[hidden email]>
> > > Date: Wed, 12 Jun 2019 23:18:24 +0200
> > >
> > > Mark Kettenis on Tue, Jun 11 2019:
> > > > The drm(4) codebase really needs multi-threaded task queues [...]
> > > >
> > > > The diff also starts 4 threads for each workqueue that gets created by
> > > > the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> > > > number of threads that Linux creates per CPU for a so-called "unbound"
> > > > workqueue which hopefully is enough to always make progress.
> > > >
> > > > Please test.
> > >
> > > Looks good and appears to work fine with two displays (one internal, one
> > > external). Will test with three at work tomorrow.
> > >
> > >
> > > > - dev_priv->hotplug.poll_init_work.tq = systq;
> > >
> > > Intentional?
> >
> > Yes.  It removes a local modification that should no longer be necessary.
> >
> > Unfortunately the diff doesn't work with amdgpu.  Some more thinking
> > needed...
>
> So here is a diff that fixes the problem as far as I can tell.
> Jonathan, Sven, can you give this a go?

This revised diff does not introduce any new problems with amdgpu as
far as I can tell.  Xorg starts/runs, piglit starts and later gets stuck
in dmafence/scheduler related wait channels (as it did before).

>
> Index: dev/pci/drm/drm_linux.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.38
> diff -u -p -r1.38 drm_linux.c
> --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> +++ dev/pci/drm/drm_linux.c 13 Jun 2019 20:53:23 -0000
> @@ -1399,15 +1399,15 @@ drm_linux_init(void)
>  {
>   if (system_wq == NULL) {
>   system_wq = (struct workqueue_struct *)
> -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmwq", 4, IPL_HIGH, 0);
>   }
>   if (system_unbound_wq == NULL) {
>   system_unbound_wq = (struct workqueue_struct *)
> -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
>   }
>   if (system_long_wq == NULL) {
>   system_long_wq = (struct workqueue_struct *)
> -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
>   }
>  
>   if (taskletq == NULL)
> Index: dev/pci/drm/i915/intel_hotplug.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 intel_hotplug.c
> --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> +++ dev/pci/drm/i915/intel_hotplug.c 13 Jun 2019 20:53:23 -0000
> @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
>   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
>   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
>   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> - dev_priv->hotplug.poll_init_work.tq = systq;
>   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
>    intel_hpd_irq_storm_reenable_work);
>  }
> Index: kern/kern_task.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_task.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 kern_task.c
> --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> +++ kern/kern_task.c 13 Jun 2019 20:53:23 -0000
> @@ -43,6 +43,7 @@ struct taskq {
>   TQ_S_DESTROYED
>   } tq_state;
>   unsigned int tq_running;
> + unsigned int tq_waiting;
>   unsigned int tq_nthreads;
>   unsigned int tq_flags;
>   const char *tq_name;
> @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
>  struct taskq taskq_sys = {
>   TQ_S_CREATED,
>   0,
> + 0,
>   1,
>   0,
>   taskq_sys_name,
> @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
>  struct taskq taskq_sys_mp = {
>   TQ_S_CREATED,
>   0,
> + 0,
>   1,
>   TASKQ_MPSAFE,
>   taskq_sys_mp_name,
> @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
>  
>   tq->tq_state = TQ_S_CREATED;
>   tq->tq_running = 0;
> + tq->tq_waiting = 0;
>   tq->tq_nthreads = nthreads;
>   tq->tq_name = name;
>   tq->tq_flags = flags;
> @@ -223,6 +227,7 @@ taskq_barrier(struct taskq *tq)
>  
>   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
>  
> + SET(t.t_flags, TASK_BARRIER);
>   task_add(tq, &t);
>   cond_wait(&c, "tqbar");
>  }
> @@ -238,6 +243,7 @@ taskq_del_barrier(struct taskq *tq, stru
>   if (task_del(tq, del))
>   return;
>  
> + SET(t.t_flags, TASK_BARRIER);
>   task_add(tq, &t);
>   cond_wait(&c, "tqbar");
>  }
> @@ -304,13 +310,26 @@ taskq_next_work(struct taskq *tq, struct
>   struct task *next;
>  
>   mtx_enter(&tq->tq_mtx);
> +retry:
>   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
>   if (tq->tq_state != TQ_S_RUNNING) {
>   mtx_leave(&tq->tq_mtx);
>   return (0);
>   }
>  
> + tq->tq_waiting++;
>   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> + tq->tq_waiting--;
> + }
> +
> + if (ISSET(next->t_flags, TASK_BARRIER)) {
> + if (++tq->tq_waiting == tq->tq_nthreads) {
> + tq->tq_waiting--;
> + } else {
> + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> + tq->tq_waiting--;
> + goto retry;
> + }
>   }
>  
>   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> Index: sys/task.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/task.h,v
> retrieving revision 1.15
> diff -u -p -r1.15 task.h
> --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> +++ sys/task.h 13 Jun 2019 20:53:23 -0000
> @@ -31,6 +31,7 @@ struct task {
>  };
>  
>  #define TASK_ONQUEUE 1
> +#define TASK_BARRIER 2
>  
>  TAILQ_HEAD(task_list, task);
>  
>

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Sven M. Hallberg
In reply to this post by Mark Kettenis
Mark Kettenis on Thu, Jun 13 2019:
> So here is a diff that fixes the problem as far as I can tell.
> Jonathan, Sven, can you give this a go?

Works for me.


> + tq->tq_waiting++;
>   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> + tq->tq_waiting--;

Heh, didn't spot this either. State machines are hard. :)


Regards.

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Jonathan Gray-11
In reply to this post by Mark Kettenis
On Wed, Jun 12, 2019 at 09:15:36AM +0200, Mark Kettenis wrote:

> > Date: Wed, 12 Jun 2019 17:04:10 +1000
> > From: Jonathan Gray <[hidden email]>
> >
> > On Tue, Jun 11, 2019 at 09:10:46PM +0200, Mark Kettenis wrote:
> > > The drm(4) codebase really needs multi-threaded task queues since the
> > > code has taks that wait for the completion of other tasks that are
> > > submitted to the same task queue.  Thank you Linux...
> > >
> > > Unfortunately the code also needs to wait for the completion of
> > > submitted tasks from other threads.  This implemented using
> > > task_barrier(9).  But unfortunately our current implementation only
> > > works for single-threaded task queues.
> > >
> > > The diff below fixes this by marking the barrier task with a flag and
> > > making sure that all threads of the task queue are syncchronized.
> > > This achived through a TASK_BARRIER flag that simply blocks the
> > > threads until the last unblocked thread sees the flag and executes the
> > > task.
> > >
> > > The diff also starts 4 threads for each workqueue that gets created by
> > > the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> > > number of threads that Linux creates per CPU for a so-called "unbound"
> > > workqueue which hopefully is enough to always make progress.
> >
> > Ignoring taskletq and systq use in burner_task/switchtask across
> > drivers and the local only backlight_schedule_update_status(), was it
> > intentional to exclude:
> >
> > workqueue.h: alloc_workqueue()
> > struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> > workqueue.h: alloc_ordered_workqueue()
> > struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> > workqueue.h:
> > #define system_power_efficient_wq ((struct workqueue_struct *)systq)
>
> Not entirely.  But I don't want to spawn too many threads...  If this
> diff seems to work, we might want to tweak the numbers a bit to see
> what works best.  Whatever we do, the ordered workqueues need to stay
> single-threaded.

While the non-drm parts of this went in the drm_linux.c and intel_hotplug.c
parts have not.  Was there a particular reason these changes didn't go in?

>
> > > Please test.  If you experience a "hang" with this diff, please try to
> > > log in to the machine remotely over ssh and send me and jsg@ the
> > > output of "ps -AHwlk".
> > >
> > > Thanks,
> > >
> > > Mark
> > >
> > >
> > > Index: dev/pci/drm/drm_linux.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > > retrieving revision 1.38
> > > diff -u -p -r1.38 drm_linux.c
> > > --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> > > +++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
> > > @@ -1399,15 +1399,15 @@ drm_linux_init(void)
> > >  {
> > >   if (system_wq == NULL) {
> > >   system_wq = (struct workqueue_struct *)
> > > -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> > > +    taskq_create("drmwq", 4, IPL_HIGH, 0);
> > >   }
> > >   if (system_unbound_wq == NULL) {
> > >   system_unbound_wq = (struct workqueue_struct *)
> > > -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> > > +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
> > >   }
> > >   if (system_long_wq == NULL) {
> > >   system_long_wq = (struct workqueue_struct *)
> > > -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> > > +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
> > >   }
> > >  
> > >   if (taskletq == NULL)
> > > Index: dev/pci/drm/i915/intel_hotplug.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> > > retrieving revision 1.2
> > > diff -u -p -r1.2 intel_hotplug.c
> > > --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> > > +++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
> > > @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
> > >   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
> > >   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
> > >   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> > > - dev_priv->hotplug.poll_init_work.tq = systq;
> > >   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
> > >    intel_hpd_irq_storm_reenable_work);
> > >  }
> > > Index: kern/kern_task.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/kern/kern_task.c,v
> > > retrieving revision 1.25
> > > diff -u -p -r1.25 kern_task.c
> > > --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> > > +++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
> > > @@ -43,6 +43,7 @@ struct taskq {
> > >   TQ_S_DESTROYED
> > >   } tq_state;
> > >   unsigned int tq_running;
> > > + unsigned int tq_barrier;
> > >   unsigned int tq_nthreads;
> > >   unsigned int tq_flags;
> > >   const char *tq_name;
> > > @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
> > >  struct taskq taskq_sys = {
> > >   TQ_S_CREATED,
> > >   0,
> > > + 0,
> > >   1,
> > >   0,
> > >   taskq_sys_name,
> > > @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
> > >  struct taskq taskq_sys_mp = {
> > >   TQ_S_CREATED,
> > >   0,
> > > + 0,
> > >   1,
> > >   TASKQ_MPSAFE,
> > >   taskq_sys_mp_name,
> > > @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
> > >  
> > >   tq->tq_state = TQ_S_CREATED;
> > >   tq->tq_running = 0;
> > > + tq->tq_barrier = 0;
> > >   tq->tq_nthreads = nthreads;
> > >   tq->tq_name = name;
> > >   tq->tq_flags = flags;
> > > @@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
> > >   panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
> > >   }
> > >  
> > > + tq->tq_barrier = 0;
> > >   while (tq->tq_running > 0) {
> > >   wakeup(tq);
> > >   msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
> > > @@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
> > >  
> > >   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
> > >  
> > > + SET(t.t_flags, TASK_BARRIER);
> > >   task_add(tq, &t);
> > >   cond_wait(&c, "tqbar");
> > >  }
> > > @@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
> > >   if (task_del(tq, del))
> > >   return;
> > >  
> > > + SET(t.t_flags, TASK_BARRIER);
> > >   task_add(tq, &t);
> > >   cond_wait(&c, "tqbar");
> > >  }
> > > @@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
> > >   struct task *next;
> > >  
> > >   mtx_enter(&tq->tq_mtx);
> > > +retry:
> > >   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
> > >   if (tq->tq_state != TQ_S_RUNNING) {
> > >   mtx_leave(&tq->tq_mtx);
> > > @@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
> > >   }
> > >  
> > >   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> > > + }
> > > +
> > > + if (ISSET(next->t_flags, TASK_BARRIER)) {
> > > + if (++tq->tq_barrier == tq->tq_nthreads) {
> > > + tq->tq_barrier = 0;
> > > + wakeup(tq);
> > > + } else {
> > > + while (tq->tq_barrier > 0)
> > > + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> > > + goto retry;
> > > + }
> > >   }
> > >  
> > >   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> > > Index: sys/task.h
> > > ===================================================================
> > > RCS file: /cvs/src/sys/sys/task.h,v
> > > retrieving revision 1.15
> > > diff -u -p -r1.15 task.h
> > > --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> > > +++ sys/task.h 11 Jun 2019 18:54:39 -0000
> > > @@ -31,6 +31,7 @@ struct task {
> > >  };
> > >  
> > >  #define TASK_ONQUEUE 1
> > > +#define TASK_BARRIER 2
> > >  
> > >  TAILQ_HEAD(task_list, task);
> > >  
> > >
> >
>

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Mark Kettenis
> Date: Fri, 5 Jul 2019 14:20:40 +1000
> From: Jonathan Gray <[hidden email]>
>
> On Wed, Jun 12, 2019 at 09:15:36AM +0200, Mark Kettenis wrote:
> > > Date: Wed, 12 Jun 2019 17:04:10 +1000
> > > From: Jonathan Gray <[hidden email]>
> > >
> > > On Tue, Jun 11, 2019 at 09:10:46PM +0200, Mark Kettenis wrote:
> > > > The drm(4) codebase really needs multi-threaded task queues since the
> > > > code has taks that wait for the completion of other tasks that are
> > > > submitted to the same task queue.  Thank you Linux...
> > > >
> > > > Unfortunately the code also needs to wait for the completion of
> > > > submitted tasks from other threads.  This implemented using
> > > > task_barrier(9).  But unfortunately our current implementation only
> > > > works for single-threaded task queues.
> > > >
> > > > The diff below fixes this by marking the barrier task with a flag and
> > > > making sure that all threads of the task queue are syncchronized.
> > > > This achived through a TASK_BARRIER flag that simply blocks the
> > > > threads until the last unblocked thread sees the flag and executes the
> > > > task.
> > > >
> > > > The diff also starts 4 threads for each workqueue that gets created by
> > > > the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> > > > number of threads that Linux creates per CPU for a so-called "unbound"
> > > > workqueue which hopefully is enough to always make progress.
> > >
> > > Ignoring taskletq and systq use in burner_task/switchtask across
> > > drivers and the local only backlight_schedule_update_status(), was it
> > > intentional to exclude:
> > >
> > > workqueue.h: alloc_workqueue()
> > > struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> > > workqueue.h: alloc_ordered_workqueue()
> > > struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> > > workqueue.h:
> > > #define system_power_efficient_wq ((struct workqueue_struct *)systq)
> >
> > Not entirely.  But I don't want to spawn too many threads...  If this
> > diff seems to work, we might want to tweak the numbers a bit to see
> > what works best.  Whatever we do, the ordered workqueues need to stay
> > single-threaded.
>
> While the non-drm parts of this went in the drm_linux.c and intel_hotplug.c
> parts have not.  Was there a particular reason these changes didn't go in?

I wanted to check whether we could get away with using less threads
for the queues but I didn't get around to doing it.  But I guess I
could just commit this and do some additional tuning later.

You're ok with the change going in?

> > > > Please test.  If you experience a "hang" with this diff, please try to
> > > > log in to the machine remotely over ssh and send me and jsg@ the
> > > > output of "ps -AHwlk".
> > > >
> > > > Thanks,
> > > >
> > > > Mark
> > > >
> > > >
> > > > Index: dev/pci/drm/drm_linux.c
> > > > ===================================================================
> > > > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > > > retrieving revision 1.38
> > > > diff -u -p -r1.38 drm_linux.c
> > > > --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> > > > +++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
> > > > @@ -1399,15 +1399,15 @@ drm_linux_init(void)
> > > >  {
> > > >   if (system_wq == NULL) {
> > > >   system_wq = (struct workqueue_struct *)
> > > > -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> > > > +    taskq_create("drmwq", 4, IPL_HIGH, 0);
> > > >   }
> > > >   if (system_unbound_wq == NULL) {
> > > >   system_unbound_wq = (struct workqueue_struct *)
> > > > -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> > > > +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
> > > >   }
> > > >   if (system_long_wq == NULL) {
> > > >   system_long_wq = (struct workqueue_struct *)
> > > > -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> > > > +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
> > > >   }
> > > >  
> > > >   if (taskletq == NULL)
> > > > Index: dev/pci/drm/i915/intel_hotplug.c
> > > > ===================================================================
> > > > RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> > > > retrieving revision 1.2
> > > > diff -u -p -r1.2 intel_hotplug.c
> > > > --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> > > > +++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
> > > > @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
> > > >   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
> > > >   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
> > > >   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> > > > - dev_priv->hotplug.poll_init_work.tq = systq;
> > > >   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
> > > >    intel_hpd_irq_storm_reenable_work);
> > > >  }
> > > > Index: kern/kern_task.c
> > > > ===================================================================
> > > > RCS file: /cvs/src/sys/kern/kern_task.c,v
> > > > retrieving revision 1.25
> > > > diff -u -p -r1.25 kern_task.c
> > > > --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> > > > +++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
> > > > @@ -43,6 +43,7 @@ struct taskq {
> > > >   TQ_S_DESTROYED
> > > >   } tq_state;
> > > >   unsigned int tq_running;
> > > > + unsigned int tq_barrier;
> > > >   unsigned int tq_nthreads;
> > > >   unsigned int tq_flags;
> > > >   const char *tq_name;
> > > > @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
> > > >  struct taskq taskq_sys = {
> > > >   TQ_S_CREATED,
> > > >   0,
> > > > + 0,
> > > >   1,
> > > >   0,
> > > >   taskq_sys_name,
> > > > @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
> > > >  struct taskq taskq_sys_mp = {
> > > >   TQ_S_CREATED,
> > > >   0,
> > > > + 0,
> > > >   1,
> > > >   TASKQ_MPSAFE,
> > > >   taskq_sys_mp_name,
> > > > @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
> > > >  
> > > >   tq->tq_state = TQ_S_CREATED;
> > > >   tq->tq_running = 0;
> > > > + tq->tq_barrier = 0;
> > > >   tq->tq_nthreads = nthreads;
> > > >   tq->tq_name = name;
> > > >   tq->tq_flags = flags;
> > > > @@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
> > > >   panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
> > > >   }
> > > >  
> > > > + tq->tq_barrier = 0;
> > > >   while (tq->tq_running > 0) {
> > > >   wakeup(tq);
> > > >   msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
> > > > @@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
> > > >  
> > > >   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
> > > >  
> > > > + SET(t.t_flags, TASK_BARRIER);
> > > >   task_add(tq, &t);
> > > >   cond_wait(&c, "tqbar");
> > > >  }
> > > > @@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
> > > >   if (task_del(tq, del))
> > > >   return;
> > > >  
> > > > + SET(t.t_flags, TASK_BARRIER);
> > > >   task_add(tq, &t);
> > > >   cond_wait(&c, "tqbar");
> > > >  }
> > > > @@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
> > > >   struct task *next;
> > > >  
> > > >   mtx_enter(&tq->tq_mtx);
> > > > +retry:
> > > >   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
> > > >   if (tq->tq_state != TQ_S_RUNNING) {
> > > >   mtx_leave(&tq->tq_mtx);
> > > > @@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
> > > >   }
> > > >  
> > > >   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> > > > + }
> > > > +
> > > > + if (ISSET(next->t_flags, TASK_BARRIER)) {
> > > > + if (++tq->tq_barrier == tq->tq_nthreads) {
> > > > + tq->tq_barrier = 0;
> > > > + wakeup(tq);
> > > > + } else {
> > > > + while (tq->tq_barrier > 0)
> > > > + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> > > > + goto retry;
> > > > + }
> > > >   }
> > > >  
> > > >   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> > > > Index: sys/task.h
> > > > ===================================================================
> > > > RCS file: /cvs/src/sys/sys/task.h,v
> > > > retrieving revision 1.15
> > > > diff -u -p -r1.15 task.h
> > > > --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> > > > +++ sys/task.h 11 Jun 2019 18:54:39 -0000
> > > > @@ -31,6 +31,7 @@ struct task {
> > > >  };
> > > >  
> > > >  #define TASK_ONQUEUE 1
> > > > +#define TASK_BARRIER 2
> > > >  
> > > >  TAILQ_HEAD(task_list, task);
> > > >  
> > > >
> > >
> >
>

Reply | Threaded
Open this post in threaded view
|

Re: drm(4), multi-threaded task queues and barriers

Jonathan Gray-11
On Fri, Jul 05, 2019 at 12:20:20PM +0200, Mark Kettenis wrote:

> > Date: Fri, 5 Jul 2019 14:20:40 +1000
> > From: Jonathan Gray <[hidden email]>
> >
> > On Wed, Jun 12, 2019 at 09:15:36AM +0200, Mark Kettenis wrote:
> > > > Date: Wed, 12 Jun 2019 17:04:10 +1000
> > > > From: Jonathan Gray <[hidden email]>
> > > >
> > > > On Tue, Jun 11, 2019 at 09:10:46PM +0200, Mark Kettenis wrote:
> > > > > The drm(4) codebase really needs multi-threaded task queues since the
> > > > > code has taks that wait for the completion of other tasks that are
> > > > > submitted to the same task queue.  Thank you Linux...
> > > > >
> > > > > Unfortunately the code also needs to wait for the completion of
> > > > > submitted tasks from other threads.  This implemented using
> > > > > task_barrier(9).  But unfortunately our current implementation only
> > > > > works for single-threaded task queues.
> > > > >
> > > > > The diff below fixes this by marking the barrier task with a flag and
> > > > > making sure that all threads of the task queue are syncchronized.
> > > > > This achived through a TASK_BARRIER flag that simply blocks the
> > > > > threads until the last unblocked thread sees the flag and executes the
> > > > > task.
> > > > >
> > > > > The diff also starts 4 threads for each workqueue that gets created by
> > > > > the drm(4) layer.  The number 4 is a bit arbitrary but it is the
> > > > > number of threads that Linux creates per CPU for a so-called "unbound"
> > > > > workqueue which hopefully is enough to always make progress.
> > > >
> > > > Ignoring taskletq and systq use in burner_task/switchtask across
> > > > drivers and the local only backlight_schedule_update_status(), was it
> > > > intentional to exclude:
> > > >
> > > > workqueue.h: alloc_workqueue()
> > > > struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> > > > workqueue.h: alloc_ordered_workqueue()
> > > > struct taskq *tq = taskq_create(name, 1, IPL_TTY, 0);
> > > > workqueue.h:
> > > > #define system_power_efficient_wq ((struct workqueue_struct *)systq)
> > >
> > > Not entirely.  But I don't want to spawn too many threads...  If this
> > > diff seems to work, we might want to tweak the numbers a bit to see
> > > what works best.  Whatever we do, the ordered workqueues need to stay
> > > single-threaded.
> >
> > While the non-drm parts of this went in the drm_linux.c and intel_hotplug.c
> > parts have not.  Was there a particular reason these changes didn't go in?
>
> I wanted to check whether we could get away with using less threads
> for the queues but I didn't get around to doing it.  But I guess I
> could just commit this and do some additional tuning later.
>
> You're ok with the change going in?

Yes, sorry if that wasn't clear.  ok jsg@

>
> > > > > Please test.  If you experience a "hang" with this diff, please try to
> > > > > log in to the machine remotely over ssh and send me and jsg@ the
> > > > > output of "ps -AHwlk".
> > > > >
> > > > > Thanks,
> > > > >
> > > > > Mark
> > > > >
> > > > >
> > > > > Index: dev/pci/drm/drm_linux.c
> > > > > ===================================================================
> > > > > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > > > > retrieving revision 1.38
> > > > > diff -u -p -r1.38 drm_linux.c
> > > > > --- dev/pci/drm/drm_linux.c 9 Jun 2019 12:58:30 -0000 1.38
> > > > > +++ dev/pci/drm/drm_linux.c 11 Jun 2019 18:54:38 -0000
> > > > > @@ -1399,15 +1399,15 @@ drm_linux_init(void)
> > > > >  {
> > > > >   if (system_wq == NULL) {
> > > > >   system_wq = (struct workqueue_struct *)
> > > > > -    taskq_create("drmwq", 1, IPL_HIGH, 0);
> > > > > +    taskq_create("drmwq", 4, IPL_HIGH, 0);
> > > > >   }
> > > > >   if (system_unbound_wq == NULL) {
> > > > >   system_unbound_wq = (struct workqueue_struct *)
> > > > > -    taskq_create("drmubwq", 1, IPL_HIGH, 0);
> > > > > +    taskq_create("drmubwq", 4, IPL_HIGH, 0);
> > > > >   }
> > > > >   if (system_long_wq == NULL) {
> > > > >   system_long_wq = (struct workqueue_struct *)
> > > > > -    taskq_create("drmlwq", 1, IPL_HIGH, 0);
> > > > > +    taskq_create("drmlwq", 4, IPL_HIGH, 0);
> > > > >   }
> > > > >  
> > > > >   if (taskletq == NULL)
> > > > > Index: dev/pci/drm/i915/intel_hotplug.c
> > > > > ===================================================================
> > > > > RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_hotplug.c,v
> > > > > retrieving revision 1.2
> > > > > diff -u -p -r1.2 intel_hotplug.c
> > > > > --- dev/pci/drm/i915/intel_hotplug.c 14 Apr 2019 10:14:52 -0000 1.2
> > > > > +++ dev/pci/drm/i915/intel_hotplug.c 11 Jun 2019 18:54:38 -0000
> > > > > @@ -619,7 +619,6 @@ void intel_hpd_init_work(struct drm_i915
> > > > >   INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
> > > > >   INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
> > > > >   INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
> > > > > - dev_priv->hotplug.poll_init_work.tq = systq;
> > > > >   INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
> > > > >    intel_hpd_irq_storm_reenable_work);
> > > > >  }
> > > > > Index: kern/kern_task.c
> > > > > ===================================================================
> > > > > RCS file: /cvs/src/sys/kern/kern_task.c,v
> > > > > retrieving revision 1.25
> > > > > diff -u -p -r1.25 kern_task.c
> > > > > --- kern/kern_task.c 28 Apr 2019 04:20:40 -0000 1.25
> > > > > +++ kern/kern_task.c 11 Jun 2019 18:54:39 -0000
> > > > > @@ -43,6 +43,7 @@ struct taskq {
> > > > >   TQ_S_DESTROYED
> > > > >   } tq_state;
> > > > >   unsigned int tq_running;
> > > > > + unsigned int tq_barrier;
> > > > >   unsigned int tq_nthreads;
> > > > >   unsigned int tq_flags;
> > > > >   const char *tq_name;
> > > > > @@ -59,6 +60,7 @@ static const char taskq_sys_name[] = "sy
> > > > >  struct taskq taskq_sys = {
> > > > >   TQ_S_CREATED,
> > > > >   0,
> > > > > + 0,
> > > > >   1,
> > > > >   0,
> > > > >   taskq_sys_name,
> > > > > @@ -77,6 +79,7 @@ static const char taskq_sys_mp_name[] =
> > > > >  struct taskq taskq_sys_mp = {
> > > > >   TQ_S_CREATED,
> > > > >   0,
> > > > > + 0,
> > > > >   1,
> > > > >   TASKQ_MPSAFE,
> > > > >   taskq_sys_mp_name,
> > > > > @@ -122,6 +125,7 @@ taskq_create(const char *name, unsigned
> > > > >  
> > > > >   tq->tq_state = TQ_S_CREATED;
> > > > >   tq->tq_running = 0;
> > > > > + tq->tq_barrier = 0;
> > > > >   tq->tq_nthreads = nthreads;
> > > > >   tq->tq_name = name;
> > > > >   tq->tq_flags = flags;
> > > > > @@ -161,6 +165,7 @@ taskq_destroy(struct taskq *tq)
> > > > >   panic("unexpected %s tq state %u", tq->tq_name, tq->tq_state);
> > > > >   }
> > > > >  
> > > > > + tq->tq_barrier = 0;
> > > > >   while (tq->tq_running > 0) {
> > > > >   wakeup(tq);
> > > > >   msleep(&tq->tq_running, &tq->tq_mtx, PWAIT, "tqdestroy", 0);
> > > > > @@ -223,6 +228,7 @@ taskq_barrier(struct taskq *tq)
> > > > >  
> > > > >   WITNESS_CHECKORDER(&tq->tq_lock_object, LOP_NEWORDER, NULL);
> > > > >  
> > > > > + SET(t.t_flags, TASK_BARRIER);
> > > > >   task_add(tq, &t);
> > > > >   cond_wait(&c, "tqbar");
> > > > >  }
> > > > > @@ -238,6 +244,7 @@ taskq_del_barrier(struct taskq *tq, stru
> > > > >   if (task_del(tq, del))
> > > > >   return;
> > > > >  
> > > > > + SET(t.t_flags, TASK_BARRIER);
> > > > >   task_add(tq, &t);
> > > > >   cond_wait(&c, "tqbar");
> > > > >  }
> > > > > @@ -304,6 +311,7 @@ taskq_next_work(struct taskq *tq, struct
> > > > >   struct task *next;
> > > > >  
> > > > >   mtx_enter(&tq->tq_mtx);
> > > > > +retry:
> > > > >   while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
> > > > >   if (tq->tq_state != TQ_S_RUNNING) {
> > > > >   mtx_leave(&tq->tq_mtx);
> > > > > @@ -311,6 +319,17 @@ taskq_next_work(struct taskq *tq, struct
> > > > >   }
> > > > >  
> > > > >   msleep(tq, &tq->tq_mtx, PWAIT, "bored", 0);
> > > > > + }
> > > > > +
> > > > > + if (ISSET(next->t_flags, TASK_BARRIER)) {
> > > > > + if (++tq->tq_barrier == tq->tq_nthreads) {
> > > > > + tq->tq_barrier = 0;
> > > > > + wakeup(tq);
> > > > > + } else {
> > > > > + while (tq->tq_barrier > 0)
> > > > > + msleep(tq, &tq->tq_mtx, PWAIT, "tqblk", 0);
> > > > > + goto retry;
> > > > > + }
> > > > >   }
> > > > >  
> > > > >   TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
> > > > > Index: sys/task.h
> > > > > ===================================================================
> > > > > RCS file: /cvs/src/sys/sys/task.h,v
> > > > > retrieving revision 1.15
> > > > > diff -u -p -r1.15 task.h
> > > > > --- sys/task.h 28 Apr 2019 04:20:40 -0000 1.15
> > > > > +++ sys/task.h 11 Jun 2019 18:54:39 -0000
> > > > > @@ -31,6 +31,7 @@ struct task {
> > > > >  };
> > > > >  
> > > > >  #define TASK_ONQUEUE 1
> > > > > +#define TASK_BARRIER 2
> > > > >  
> > > > >  TAILQ_HEAD(task_list, task);
> > > > >  
> > > > >
> > > >
> > >
> >