kqueue_scan() refactoring

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

kqueue_scan() refactoring

Martin Pieuchot
Diff below reduces kqueue_scan() to the collect of events on a given
kqueue and let its caller, sys_kevent(), responsible for the copyout(9).

Apart from the code simplification, this refactoring clearly separates
kqueue_scan() from the syscall logic.  That should allow us to re-use
the function in a different context and to address its need for locking
independently.

Since the number of events that are ready to be collected can be bigger
than the size of the array, the pair kqueue_scan()/copyout() may be
called multiple times.  In that case, successive calls should no longer
block, this is performed by using a zero, but not NULL, timeout which
correspond to a non-blocking scan.

This is the next piece of the ongoing work to move select/poll/kqueue.
I'd like to be sure it doesn't introduce any regression.

Comments, tests and oks welcome :o)

Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.131
diff -u -p -r1.131 kern_event.c
--- kern/kern_event.c 7 Apr 2020 13:27:51 -0000 1.131
+++ kern/kern_event.c 9 Apr 2020 17:09:58 -0000
@@ -62,9 +62,8 @@ void KQREF(struct kqueue *);
 void KQRELE(struct kqueue *);
 
 int kqueue_sleep(struct kqueue *, struct timespec *);
-int kqueue_scan(struct kqueue *kq, int maxevents,
-    struct kevent *ulistp, struct timespec *timeout,
-    struct proc *p, int *retval);
+int kqueue_scan(struct kqueue *, struct kevent *, int, struct timespec *,
+    struct proc *, int *);
 
 int kqueue_read(struct file *, struct uio *, int);
 int kqueue_write(struct file *, struct uio *, int);
@@ -544,6 +543,7 @@ sys_kevent(struct proc *p, void *v, regi
  struct timespec ts;
  struct timespec *tsp = NULL;
  int i, n, nerrors, error;
+ int ready, total;
  struct kevent kev[KQ_NEVENTS];
 
  if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
@@ -612,10 +612,32 @@ sys_kevent(struct proc *p, void *v, regi
 
  KQREF(kq);
  FRELE(fp, p);
- error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist),
-    tsp, p, &n);
+ /*
+ * Collect as many events as we can.  The timeout on successive
+ * loops is disabled (kqueue_scan() becomes non-blocking).
+ */
+ total = 0;
+ error = 0;
+ while ((n = SCARG(uap, nevents) - total) > 0) {
+ if (n > nitems(kev))
+ n = nitems(kev);
+ ready = kqueue_scan(kq, kev, n, tsp, p, &error);
+ if (ready == 0)
+ break;
+ error = copyout(kev, SCARG(uap, eventlist) + total,
+    sizeof(struct kevent) * ready);
+ total += ready;
+ /*
+ * Stop if there was an error or if we had enough
+ * place to collect all events that were ready.
+ */
+ if (error || ready < n)
+ break;
+ tsp = &ts; /* successive loops non-blocking */
+ timespecclear(tsp);
+ }
  KQRELE(kq);
- *retval = n;
+ *retval = total;
  return (error);
 
  done:
@@ -869,18 +891,18 @@ kqueue_sleep(struct kqueue *kq, struct t
  return (error);
 }
 
+/*
+ * Scan the kqueue, blocking if necessary until the target time is reached.
+ * If tsp is NULL we block indefinitely.  If tsp->ts_secs/nsecs are both
+ * 0 we do not block at all.
+ */
 int
-kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp,
-    struct timespec *tsp, struct proc *p, int *retval)
+kqueue_scan(struct kqueue *kq, struct kevent *kev, int count,
+    struct timespec *tsp, struct proc *p, int *errorp)
 {
- struct kevent *kevp;
  struct knote mend, mstart, *kn;
- int s, count, nkev = 0, error = 0;
- struct kevent kev[KQ_NEVENTS];
-
- count = maxevents;
- if (count == 0)
- goto done;
+ int s, nkev = 0, error = 0;
+ struct kevent *kevp = kev;
 
  memset(&mstart, 0, sizeof(mstart));
  memset(&mend, 0, sizeof(mend));
@@ -891,7 +913,6 @@ retry:
  goto done;
  }
 
- kevp = &kev[0];
  s = splhigh();
  if (kq->kq_count == 0) {
  if (tsp != NULL && !timespecisset(tsp)) {
@@ -910,6 +931,9 @@ retry:
  goto done;
  }
 
+ /*
+ * Collect events
+ */
  mstart.kn_filter = EVFILT_MARKER;
  mstart.kn_status = KN_PROCESSING;
  TAILQ_INSERT_HEAD(&kq->kq_head, &mstart, kn_tqe);
@@ -919,14 +943,8 @@ retry:
  while (count) {
  kn = TAILQ_NEXT(&mstart, kn_tqe);
  if (kn->kn_filter == EVFILT_MARKER) {
- if (kn == &mend) {
- TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe);
- TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe);
- splx(s);
- if (count == maxevents)
- goto retry;
- goto done;
- }
+ if (kn == &mend)
+ break;
 
  /* Move start marker past another thread's marker. */
  TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe);
@@ -958,6 +976,11 @@ retry:
  *kevp = kn->kn_kevent;
  kevp++;
  nkev++;
+ count--;
+
+ /*
+ * Post-event action on the note
+ */
  if (kn->kn_flags & EV_ONESHOT) {
  splx(s);
  kn->kn_fop->f_detach(kn);
@@ -983,37 +1006,21 @@ retry:
  knote_release(kn);
  }
  kqueue_check(kq);
- count--;
- if (nkev == KQ_NEVENTS) {
- splx(s);
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_STRUCT))
- ktrevent(p, kev, nkev);
-#endif
- error = copyout(kev, ulistp,
-    sizeof(struct kevent) * nkev);
- ulistp += nkev;
- nkev = 0;
- kevp = &kev[0];
- s = splhigh();
- if (error)
- break;
- }
  }
  TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe);
  TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe);
  splx(s);
+ if (nkev == 0)
+ goto retry;
 done:
  if (nkev != 0) {
 #ifdef KTRACE
  if (KTRPOINT(p, KTR_STRUCT))
  ktrevent(p, kev, nkev);
 #endif
- error = copyout(kev, ulistp,
-    sizeof(struct kevent) * nkev);
  }
- *retval = maxevents - count;
- return (error);
+ *errorp = error;
+ return (nkev);
 }
 
 /*

Reply | Threaded
Open this post in threaded view
|

Re: kqueue_scan() refactoring

Martin Pieuchot
On 10/04/20(Fri) 14:43, Martin Pieuchot wrote:

> Diff below reduces kqueue_scan() to the collect of events on a given
> kqueue and let its caller, sys_kevent(), responsible for the copyout(9).
>
> Apart from the code simplification, this refactoring clearly separates
> kqueue_scan() from the syscall logic.  That should allow us to re-use
> the function in a different context and to address its need for locking
> independently.
>
> Since the number of events that are ready to be collected can be bigger
> than the size of the array, the pair kqueue_scan()/copyout() may be
> called multiple times.  In that case, successive calls should no longer
> block, this is performed by using a zero, but not NULL, timeout which
> correspond to a non-blocking scan.
>
> This is the next piece of the ongoing work to move select/poll/kqueue.
> I'd like to be sure it doesn't introduce any regression.

Updated diff based on recent changes and incorporating feedbacks from
visa@.

Comments?  Oks?

Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.132
diff -u -p -r1.132 kern_event.c
--- kern/kern_event.c 17 May 2020 10:53:14 -0000 1.132
+++ kern/kern_event.c 25 May 2020 11:54:08 -0000
@@ -62,9 +62,6 @@ void KQREF(struct kqueue *);
 void KQRELE(struct kqueue *);
 
 int kqueue_sleep(struct kqueue *, struct timespec *);
-int kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
-    struct kevent *ulistp, struct timespec *timeout,
-    struct proc *p, int *retval);
 
 int kqueue_read(struct file *, struct uio *, int);
 int kqueue_write(struct file *, struct uio *, int);
@@ -545,6 +542,7 @@ sys_kevent(struct proc *p, void *v, regi
  struct timespec ts;
  struct timespec *tsp = NULL;
  int i, n, nerrors, error;
+ int ready, total;
  struct kevent kev[KQ_NEVENTS];
 
  if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
@@ -573,9 +571,9 @@ sys_kevent(struct proc *p, void *v, regi
  kq = fp->f_data;
  nerrors = 0;
 
- while (SCARG(uap, nchanges) > 0) {
- n = SCARG(uap, nchanges) > KQ_NEVENTS ?
-    KQ_NEVENTS : SCARG(uap, nchanges);
+ while ((n = SCARG(uap, nchanges)) > 0) {
+ if (n > nitems(kev))
+ n = nitems(kev);
  error = copyin(SCARG(uap, changelist), kev,
     n * sizeof(struct kevent));
  if (error)
@@ -611,14 +609,41 @@ sys_kevent(struct proc *p, void *v, regi
  goto done;
  }
 
+
  KQREF(kq);
  FRELE(fp, p);
+ /*
+ * Collect as many events as we can.  The timeout on successive
+ * loops is disabled (kqueue_scan() becomes non-blocking).
+ */
+ total = 0;
+ error = 0;
  kqueue_scan_setup(&scan, kq);
- error = kqueue_scan(&scan, SCARG(uap, nevents), SCARG(uap, eventlist),
-    tsp, p, &n);
+ while ((n = SCARG(uap, nevents) - total) > 0) {
+ if (n > nitems(kev))
+ n = nitems(kev);
+ ready = kqueue_scan(&scan, n, kev, tsp, p, &error);
+ if (ready == 0)
+ break;
+ error = copyout(kev, SCARG(uap, eventlist) + total,
+    sizeof(struct kevent) * ready);
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_STRUCT))
+ ktrevent(p, kev, ready);
+#endif
+ total += ready;
+ /*
+ * Stop if there was an error or if we had enough
+ * place to collect all events that were ready.
+ */
+ if (error || ready < n)
+ break;
+ tsp = &ts; /* successive loops non-blocking */
+ timespecclear(tsp);
+ }
  kqueue_scan_finish(&scan);
  KQRELE(kq);
- *retval = n;
+ *retval = total;
  return (error);
 
  done:
@@ -872,15 +897,18 @@ kqueue_sleep(struct kqueue *kq, struct t
  return (error);
 }
 
+/*
+ * Scan the kqueue, blocking if necessary until the target time is reached.
+ * If tsp is NULL we block indefinitely.  If tsp->ts_secs/nsecs are both
+ * 0 we do not block at all.
+ */
 int
 kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
-    struct kevent *ulistp, struct timespec *tsp, struct proc *p, int *retval)
+    struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp)
 {
- struct kevent *kevp;
  struct knote *kn;
  struct kqueue *kq = scan->kqs_kq;
  int s, count, nkev = 0, error = 0;
- struct kevent kev[KQ_NEVENTS];
 
  count = maxevents;
  if (count == 0)
@@ -892,7 +920,6 @@ retry:
  goto done;
  }
 
- kevp = &kev[0];
  s = splhigh();
  if (kq->kq_count == 0) {
  if ((tsp != NULL && !timespecisset(tsp)) ||
@@ -933,14 +960,8 @@ retry:
  while (count) {
  kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe);
  if (kn->kn_filter == EVFILT_MARKER) {
- if (kn == &scan->kqs_end) {
- TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start,
-    kn_tqe);
- splx(s);
- if (scan->kqs_nevent == 0)
- goto retry;
- goto done;
- }
+ if (kn == &scan->kqs_end)
+ break;
 
  /* Move start marker past another thread's marker. */
  TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
@@ -976,6 +997,9 @@ retry:
  count--;
  scan->kqs_nevent++;
 
+ /*
+ * Post-event action on the note
+ */
  if (kn->kn_flags & EV_ONESHOT) {
  splx(s);
  kn->kn_fop->f_detach(kn);
@@ -1001,35 +1025,14 @@ retry:
  knote_release(kn);
  }
  kqueue_check(kq);
- if (nkev == KQ_NEVENTS) {
- splx(s);
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_STRUCT))
- ktrevent(p, kev, nkev);
-#endif
- error = copyout(kev, ulistp,
-    sizeof(struct kevent) * nkev);
- ulistp += nkev;
- nkev = 0;
- kevp = &kev[0];
- s = splhigh();
- if (error)
- break;
- }
  }
  TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
  splx(s);
+ if (scan->kqs_nevent == 0)
+ goto retry;
 done:
- if (nkev != 0) {
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_STRUCT))
- ktrevent(p, kev, nkev);
-#endif
- error = copyout(kev, ulistp,
-    sizeof(struct kevent) * nkev);
- }
- *retval = maxevents - count;
- return (error);
+ *errorp = error;
+ return (nkev);
 }
 
 void
Index: sys/event.h
===================================================================
RCS file: /cvs/src/sys/sys/event.h,v
retrieving revision 1.37
diff -u -p -r1.37 event.h
--- sys/event.h 17 May 2020 10:53:14 -0000 1.37
+++ sys/event.h 25 May 2020 11:34:34 -0000
@@ -211,6 +211,8 @@ extern int kqueue_register(struct kqueue
     struct kevent *kev, struct proc *p);
 extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *);
 extern void kqueue_scan_finish(struct kqueue_scan_state *);
+extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,
+    struct timespec *, struct proc *, int *);
 extern int filt_seltrue(struct knote *kn, long hint);
 extern int seltrue_kqfilter(dev_t, struct knote *);
 extern void klist_insert(struct klist *, struct knote *);