ioctl(2) & fcntl(2) tweaks

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

ioctl(2) & fcntl(2) tweaks

Martin Pieuchot
The next important step towards removing the KERNEL_LOCK() from the
kernel is to be able to execute ioctl(2)s without it.

The first area that can benefit from this is obviously the Network
Stack.  tb@ and kn@ are working on this area so they'll soon need a
way to test really test their diffs.

But some pseudo-devices could also benefit from this.  I'm thinking
at vmm(4), bpf(4) and pf(4) in a first place...

So here's a diff that remove some duplicate logic from the generics
sys_ioctl() and sys_fcntl().  Once this is in we can start pushing
the KERNEL_LOCK() down, inside every `fo_ioctl'.

While here I'm also getting rid of M_IOCTLOPS, all other places where
an allocation is needed for a syscall are using M_TEMP.  So less is
more.

Ok?

Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.175
diff -u -p -r1.175 kern_descrip.c
--- kern/kern_descrip.c 2 Jul 2018 14:36:33 -0000 1.175
+++ kern/kern_descrip.c 3 Jul 2018 10:26:47 -0000
@@ -484,14 +484,6 @@ restart:
  break;
 
  case F_GETOWN:
- if (fp->f_type == DTYPE_SOCKET) {
- *retval = ((struct socket *)fp->f_data)->so_pgid;
- break;
- }
- if (fp->f_type == DTYPE_PIPE) {
- *retval = ((struct pipe *)fp->f_data)->pipe_pgid;
- break;
- }
  tmp = 0;
  error = (*fp->f_ops->fo_ioctl)
  (fp, TIOCGPGRP, (caddr_t)&tmp, p);
@@ -499,20 +491,6 @@ restart:
  break;
 
  case F_SETOWN:
- if (fp->f_type == DTYPE_SOCKET) {
- struct socket *so = fp->f_data;
-
- so->so_pgid = (long)SCARG(uap, arg);
- so->so_siguid = p->p_ucred->cr_ruid;
- so->so_sigeuid = p->p_ucred->cr_uid;
- break;
- }
- if (fp->f_type == DTYPE_PIPE) {
- struct pipe *mpipe = fp->f_data;
-
- mpipe->pipe_pgid = (long)SCARG(uap, arg);
- break;
- }
  if ((long)SCARG(uap, arg) <= 0) {
  SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg));
  } else {
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.119
diff -u -p -r1.119 sys_generic.c
--- kern/sys_generic.c 8 May 2018 08:53:41 -0000 1.119
+++ kern/sys_generic.c 3 Jul 2018 10:20:38 -0000
@@ -389,7 +389,7 @@ sys_ioctl(struct proc *p, void *v, regis
  syscallarg(void *) data;
  } */ *uap = v;
  struct file *fp;
- struct filedesc *fdp;
+ struct filedesc *fdp = p->p_fd;
  u_long com = SCARG(uap, com);
  int error = 0;
  u_int size;
@@ -398,7 +398,6 @@ sys_ioctl(struct proc *p, void *v, regis
 #define STK_PARAMS 128
  long long stkbuf[STK_PARAMS / sizeof(long long)];
 
- fdp = p->p_fd;
  if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL)
  return (EBADF);
 
@@ -437,7 +436,7 @@ sys_ioctl(struct proc *p, void *v, regis
  goto out;
  }
  if (size > sizeof (stkbuf)) {
- memp = malloc(size, M_IOCTLOPS, M_WAITOK);
+ memp = malloc(size, M_TEMP, M_WAITOK);
  data = memp;
  } else
  data = (caddr_t)stkbuf;
@@ -478,15 +477,7 @@ sys_ioctl(struct proc *p, void *v, regis
 
  case FIOSETOWN:
  tmp = *(int *)data;
- if (fp->f_type == DTYPE_SOCKET) {
- struct socket *so = fp->f_data;
 
- so->so_pgid = tmp;
- so->so_siguid = p->p_ucred->cr_ruid;
- so->so_sigeuid = p->p_ucred->cr_uid;
- error = 0;
- break;
- }
  if (tmp <= 0) {
  tmp = -tmp;
  } else {
@@ -502,11 +493,6 @@ sys_ioctl(struct proc *p, void *v, regis
  break;
 
  case FIOGETOWN:
- if (fp->f_type == DTYPE_SOCKET) {
- error = 0;
- *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
- break;
- }
  error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
  *(int *)data = -*(int *)data;
  break;
@@ -523,7 +509,7 @@ sys_ioctl(struct proc *p, void *v, regis
  error = copyout(data, SCARG(uap, data), size);
 out:
  FRELE(fp, p);
- free(memp, M_IOCTLOPS, size);
+ free(memp, M_TEMP, size);
  return (error);
 }
 
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.81
diff -u -p -r1.81 sys_pipe.c
--- kern/sys_pipe.c 18 Jun 2018 09:15:05 -0000 1.81
+++ kern/sys_pipe.c 3 Jul 2018 10:26:34 -0000
@@ -665,6 +665,8 @@ pipe_ioctl(struct file *fp, u_long cmd,
  *(int *)data = mpipe->pipe_buffer.cnt;
  return (0);
 
+ case TIOCSPGRP:
+ /* FALLTHROUGH */
  case SIOCSPGRP:
  mpipe->pipe_pgid = *(int *)data;
  return (0);
@@ -672,6 +674,10 @@ pipe_ioctl(struct file *fp, u_long cmd,
  case SIOCGPGRP:
  *(int *)data = mpipe->pipe_pgid;
  return (0);
+
+ case TIOCGPGRP:
+ *(int *)data = -mpipe->pipe_pgid;
+ break;
 
  }
  return (ENOTTY);
Index: kern/sys_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.38
diff -u -p -r1.38 sys_socket.c
--- kern/sys_socket.c 6 Jun 2018 06:55:22 -0000 1.38
+++ kern/sys_socket.c 3 Jul 2018 10:24:57 -0000
@@ -109,10 +109,16 @@ soo_ioctl(struct file *fp, u_long cmd, c
  *(int *)data = so->so_rcv.sb_datacc;
  break;
 
+ case TIOCSPGRP:
+ /* FALLTHROUGH */
  case SIOCSPGRP:
  so->so_pgid = *(int *)data;
  so->so_siguid = p->p_ucred->cr_ruid;
  so->so_sigeuid = p->p_ucred->cr_uid;
+ break;
+
+ case TIOCGPGRP:
+ *(int *)data = -so->so_pgid;
  break;
 
  case SIOCGPGRP:

Reply | Threaded
Open this post in threaded view
|

Re: ioctl(2) & fcntl(2) tweaks

Theo de Raadt-2
> While here I'm also getting rid of M_IOCTLOPS, all other places where
> an allocation is needed for a syscall are using M_TEMP.  So less is
> more.

The reason for this malloc object typing is (a) an lookup optimization
for KMEMSTATS, and (2) to catch certain types of bugs.  It has also been
used to isolate observed leaks to particular code sequences.  Basically
you argue the time for that is over?

I'm not so sure.

But, even more sure this change shouldn't be in the same diff.



 

Reply | Threaded
Open this post in threaded view
|

Re: ioctl(2) & fcntl(2) tweaks

Theo Buehler-5
In reply to this post by Martin Pieuchot
On Tue, Jul 03, 2018 at 12:38:37PM +0200, Martin Pieuchot wrote:

> The next important step towards removing the KERNEL_LOCK() from the
> kernel is to be able to execute ioctl(2)s without it.
>
> The first area that can benefit from this is obviously the Network
> Stack.  tb@ and kn@ are working on this area so they'll soon need a
> way to test really test their diffs.
>
> But some pseudo-devices could also benefit from this.  I'm thinking
> at vmm(4), bpf(4) and pf(4) in a first place...
>
> So here's a diff that remove some duplicate logic from the generics
> sys_ioctl() and sys_fcntl().  Once this is in we can start pushing
> the KERNEL_LOCK() down, inside every `fo_ioctl'.
>
> While here I'm also getting rid of M_IOCTLOPS, all other places where
> an allocation is needed for a syscall are using M_TEMP.  So less is
> more.
>
> Ok?

The M_IOCTLOPS change seems to require more discussion.

ok for the rest.

Reply | Threaded
Open this post in threaded view
|

Re: ioctl(2) & fcntl(2) tweaks

Martin Pieuchot
In reply to this post by Martin Pieuchot
On 03/07/18(Tue) 12:38, Martin Pieuchot wrote:

> The next important step towards removing the KERNEL_LOCK() from the
> kernel is to be able to execute ioctl(2)s without it.
>
> The first area that can benefit from this is obviously the Network
> Stack.  tb@ and kn@ are working on this area so they'll soon need a
> way to test really test their diffs.
>
> But some pseudo-devices could also benefit from this.  I'm thinking
> at vmm(4), bpf(4) and pf(4) in a first place...
>
> So here's a diff that remove some duplicate logic from the generics
> sys_ioctl() and sys_fcntl().  Once this is in we can start pushing
> the KERNEL_LOCK() down, inside every `fo_ioctl'.

Updated diff on top of recent changes.  This one doesn't include the
M_IOCTLOPS change.

It also keeps the previous semantic allowing a negative value for pgid
for sockets.  I'm also adding this ability for the FIOSETOWN ioctl(2)
issued on pipes to be coherent with fcntl(2)'s F_SETOWN.

Ok?

Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.176
diff -u -p -r1.176 kern_descrip.c
--- kern/kern_descrip.c 7 Jul 2018 16:14:40 -0000 1.176
+++ kern/kern_descrip.c 9 Jul 2018 08:44:22 -0000
@@ -484,14 +484,6 @@ restart:
  break;
 
  case F_GETOWN:
- if (fp->f_type == DTYPE_SOCKET) {
- *retval = ((struct socket *)fp->f_data)->so_pgid;
- break;
- }
- if (fp->f_type == DTYPE_PIPE) {
- *retval = ((struct pipe *)fp->f_data)->pipe_pgid;
- break;
- }
  tmp = 0;
  error = (*fp->f_ops->fo_ioctl)
  (fp, TIOCGPGRP, (caddr_t)&tmp, p);
@@ -500,21 +492,9 @@ restart:
 
  case F_SETOWN:
  tmp = (long)SCARG(uap, arg);
- if (fp->f_type == DTYPE_SOCKET) {
- struct socket *so = fp->f_data;
-
- so->so_pgid = tmp;
- so->so_siguid = p->p_ucred->cr_ruid;
- so->so_sigeuid = p->p_ucred->cr_uid;
- break;
- }
- if (fp->f_type == DTYPE_PIPE) {
- struct pipe *mpipe = fp->f_data;
-
- mpipe->pipe_pgid = tmp;
- break;
- }
- if (tmp <= 0) {
+ if (fp->f_type == DTYPE_SOCKET || fp->f_type == DTYPE_PIPE) {
+ /* nothing */
+ } else if (tmp <= 0) {
  tmp = -tmp;
  } else {
  struct process *pr1 = prfind(tmp);
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.119
diff -u -p -r1.119 sys_generic.c
--- kern/sys_generic.c 8 May 2018 08:53:41 -0000 1.119
+++ kern/sys_generic.c 9 Jul 2018 08:44:18 -0000
@@ -389,7 +389,7 @@ sys_ioctl(struct proc *p, void *v, regis
  syscallarg(void *) data;
  } */ *uap = v;
  struct file *fp;
- struct filedesc *fdp;
+ struct filedesc *fdp = p->p_fd;
  u_long com = SCARG(uap, com);
  int error = 0;
  u_int size;
@@ -398,7 +398,6 @@ sys_ioctl(struct proc *p, void *v, regis
 #define STK_PARAMS 128
  long long stkbuf[STK_PARAMS / sizeof(long long)];
 
- fdp = p->p_fd;
  if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL)
  return (EBADF);
 
@@ -478,16 +477,10 @@ sys_ioctl(struct proc *p, void *v, regis
 
  case FIOSETOWN:
  tmp = *(int *)data;
- if (fp->f_type == DTYPE_SOCKET) {
- struct socket *so = fp->f_data;
 
- so->so_pgid = tmp;
- so->so_siguid = p->p_ucred->cr_ruid;
- so->so_sigeuid = p->p_ucred->cr_uid;
- error = 0;
- break;
- }
- if (tmp <= 0) {
+ if (fp->f_type == DTYPE_SOCKET || fp->f_type == DTYPE_PIPE) {
+ /* nothing */
+ } else if (tmp <= 0) {
  tmp = -tmp;
  } else {
  struct process *pr = prfind(tmp);
@@ -502,11 +495,6 @@ sys_ioctl(struct proc *p, void *v, regis
  break;
 
  case FIOGETOWN:
- if (fp->f_type == DTYPE_SOCKET) {
- error = 0;
- *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
- break;
- }
  error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
  *(int *)data = -*(int *)data;
  break;
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.81
diff -u -p -r1.81 sys_pipe.c
--- kern/sys_pipe.c 18 Jun 2018 09:15:05 -0000 1.81
+++ kern/sys_pipe.c 9 Jul 2018 08:34:31 -0000
@@ -665,6 +665,8 @@ pipe_ioctl(struct file *fp, u_long cmd,
  *(int *)data = mpipe->pipe_buffer.cnt;
  return (0);
 
+ case TIOCSPGRP:
+ /* FALLTHROUGH */
  case SIOCSPGRP:
  mpipe->pipe_pgid = *(int *)data;
  return (0);
@@ -672,6 +674,10 @@ pipe_ioctl(struct file *fp, u_long cmd,
  case SIOCGPGRP:
  *(int *)data = mpipe->pipe_pgid;
  return (0);
+
+ case TIOCGPGRP:
+ *(int *)data = -mpipe->pipe_pgid;
+ break;
 
  }
  return (ENOTTY);
Index: kern/sys_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.38
diff -u -p -r1.38 sys_socket.c
--- kern/sys_socket.c 6 Jun 2018 06:55:22 -0000 1.38
+++ kern/sys_socket.c 9 Jul 2018 08:34:31 -0000
@@ -109,10 +109,16 @@ soo_ioctl(struct file *fp, u_long cmd, c
  *(int *)data = so->so_rcv.sb_datacc;
  break;
 
+ case TIOCSPGRP:
+ /* FALLTHROUGH */
  case SIOCSPGRP:
  so->so_pgid = *(int *)data;
  so->so_siguid = p->p_ucred->cr_ruid;
  so->so_sigeuid = p->p_ucred->cr_uid;
+ break;
+
+ case TIOCGPGRP:
+ *(int *)data = -so->so_pgid;
  break;
 
  case SIOCGPGRP: