NFS socket use after free during reboot

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

NFS socket use after free during reboot

Alexander Bluhm
Hi,

When rebooting the NFS client while the NFS file system is actively
used, the kernel crashes.  The socket at 0xd73c2d9c is filled with
dead beef, so it is a use after free.  It is an i386 kernel built
today.

bluhm

root@ot2:.../~# find /mount >/dev/null & sleep 5; reboot -q
[1] 9698
syncing disks... uvm_fault(0xd72afc7c, 0x1ff11000, 0, 1) -> e
kernel: page fault trap, code=0
Stopped at      sblock+0x12:    movl    0x4(%eax),%eax
ddb{0}> trace
sblock(d73c2d9c,d73c2df0,1) at sblock+0x12
soreceive(d73c2d9c,0,f548d818,f548d884,0,f548d804,0) at soreceive+0x271
nfs_receive(d7471f7c,f548d87c,f548d884) at nfs_receive+0xb1
nfs_reply(d7471f7c) at nfs_reply+0x62
nfs_request(d6d1f3c4,10,f548d970) at nfs_request+0x24d
nfs_readdirrpc(d6d1f3c4,f548d9f8,d7499120,f548d9ec) at nfs_readdirrpc+0x1dc
nfs_readdir(f548dab0) at nfs_readdir+0x227
VOP_READDIR(d6d1f3c4,f548daf8,d7499120,f548daec) at VOP_READDIR+0x42
sys_getdents(d71372dc,f548db68,f548db60) at sys_getdents+0x118
syscall() at syscall+0x204
--- syscall (number 0) ---
end of kernel
0x78ef0480:
ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 47525   49437  73751      0  2         0x3                reboot
* 9698  299089  73751      0  7    0x100003                find
 73751  193388      1      0  3    0x10008b  pause         ksh
 26295  320696      1      0  3    0x100083  ttyin         getty
 62718  326479      1      0  3    0x100083  ttyin         getty
  6755  452271      1      0  3    0x100083  ttyin         getty
 62489  430851      1      0  3    0x100083  ttyin         getty
 13303  429205      1      0  3    0x100083  ttyin         getty
 97097  367369      1      0  3    0x100098  poll          cron
 38250  509671      1     99  3    0x100090  poll          sndiod
   367  406650      1    110  3    0x100090  poll          sndiod
 21810  404127      1      0  3    0x100090  kqread        inetd
 98071  120936  58301     95  3    0x100092  kqread        smtpd
 69756   87980  58301    103  3    0x100092  kqread        smtpd
 36661  383137  58301     95  3    0x100092  kqread        smtpd
 73070  332826  58301     95  3    0x100092  kqread        smtpd
 39972   98572  58301     95  3    0x100092  kqread        smtpd
 79819  455669  58301     95  3    0x100092  kqread        smtpd
 58301  220916      1      0  3    0x100080  kqread        smtpd
 45347  311418      1      0  3        0x80  select        sshd
 69335  244689      0      0  3     0x14200  acct          acct
 78078  383503      0      0  3     0x14280  nfsidl        nfsio
 67499  120783      0      0  3     0x14280  nfsidl        nfsio
  6389   67663      0      0  3     0x14280  nfsidl        nfsio
 96664  329282      0      0  3     0x14280  nfsidl        nfsio
 89233   98160      1      0  3    0x100080  poll          ntpd
 16545  425860  94501     83  3    0x100092  poll          ntpd
 94501  456716      1     83  3    0x100092  poll          ntpd
 89114  244638  64496     74  3    0x100092  bpf           pflogd
 64496  138057      1      0  3        0x80  netio         pflogd
 33873    3141   2011     73  2    0x100090                syslogd
  2011  167758      1      0  3    0x100082  netio         syslogd
 14968  488435      1     77  3    0x100090  poll          dhclient
 20238  513767      1      0  3        0x80  poll          dhclient
 99266   92995  75879    115  3    0x100092  kqread        slaacd
 12302   94011  75879    115  3    0x100092  kqread        slaacd
 75879  477107      1      0  3        0x80  kqread        slaacd
 94668  139960      0      0  3     0x14200  pgzero        zerothread
 88879  391716      0      0  3     0x14200  aiodoned      aiodoned
 37996  199474      0      0  3     0x14200  syncer        update
 52467  413889      0      0  3     0x14200  cleaner       cleaner
 52168  367270      0      0  3     0x14200  reaper        reaper
 90849  369485      0      0  3     0x14200  pgdaemon      pagedaemon
 68700   48024      0      0  3     0x14200  bored         crynlk
 37778  144411      0      0  3     0x14200  bored         crypto
 31589  214320      0      0  3     0x14200  usbtsk        usbtask
 10880  448964      0      0  3     0x14200  usbatsk       usbatsk
 66170  108418      0      0  3     0x14200  bored         sensors
 86508  510965      0      0  3  0x40014200  acpi0         acpi0
 41740  521514      0      0  7  0x40014200                idle1
 42154  396729      0      0  2     0x14200                softnet
 65803  465644      0      0  3     0x14200  bored         systqmp
 33651   52515      0      0  3     0x14200  bored         systq
 44323  189892      0      0  2  0x40014200                softclock
 25032  280029      0      0  3  0x40014200                idle0
  4614  206064      0      0  3     0x14200  kmalloc       kmthread
     1   57612      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{0}> x/x 0xd73c2d9c,10
0xd73c2d9c:     1ff118e7
0xd73c2da0:     7814c6e
0xd73c2da4:     efff1133
0xd73c2da8:     efff1133
0xd73c2dac:     efff1133
0xd73c2db0:     efff1133
0xd73c2db4:     efff1133
0xd73c2db8:     efff1133
0xd73c2dbc:     efff1133
0xd73c2dc0:     efff1133
0xd73c2dc4:     efff1133
0xd73c2dc8:     efff1133
0xd73c2dcc:     efff1133
0xd73c2dd0:     efff1133
0xd73c2dd4:     efff1133
0xd73c2dd8:     efff1133
ddb{0}> x/s version
version:        OpenBSD 6.3-beta (GENERIC.MP) #2: Thu Mar  8 21:02:29 CET 2018\
012    [hidden email]:/usr/src/sys/arch/i386/compile/GENERIC.MP\012

Reply | Threaded
Open this post in threaded view
|

Re: NFS socket use after free during reboot

Martin Pieuchot
On 08/03/18(Thu) 23:16, Alexander Bluhm wrote:
> Hi,
>
> When rebooting the NFS client while the NFS file system is actively
> used, the kernel crashes.  The socket at 0xd73c2d9c is filled with
> dead beef, so it is a use after free.  It is an i386 kernel built
> today.

There are multiple known issues with umounting a busy NFS client.
These issues were previously masked by the "remount read-only"
logic at shutdown.

> root@ot2:.../~# find /mount >/dev/null & sleep 5; reboot -q
> [1] 9698
> syncing disks... uvm_fault(0xd72afc7c, 0x1ff11000, 0, 1) -> e
> kernel: page fault trap, code=0
> Stopped at      sblock+0x12:    movl    0x4(%eax),%eax
> ddb{0}> trace
> sblock(d73c2d9c,d73c2df0,1) at sblock+0x12
> soreceive(d73c2d9c,0,f548d818,f548d884,0,f548d804,0) at soreceive+0x271
> nfs_receive(d7471f7c,f548d87c,f548d884) at nfs_receive+0xb1
> nfs_reply(d7471f7c) at nfs_reply+0x62
> nfs_request(d6d1f3c4,10,f548d970) at nfs_request+0x24d
> nfs_readdirrpc(d6d1f3c4,f548d9f8,d7499120,f548d9ec) at nfs_readdirrpc+0x1dc
> nfs_readdir(f548dab0) at nfs_readdir+0x227
> VOP_READDIR(d6d1f3c4,f548daf8,d7499120,f548daec) at VOP_READDIR+0x42
> sys_getdents(d71372dc,f548db68,f548db60) at sys_getdents+0x118
> syscall() at syscall+0x204
> --- syscall (number 0) ---

Your trace shows two things.  First of all the userland thread doing
getdents(2) is getting schedule after nfs_unmount() has freed the
socket.  Secondly it shows that such thread has no way to know that
the socket is no longer valid.

My previous attempt to fix this problem, my preventing all reconnect as
soon as nfs_unmount() has been called only moved the panic to a
different layer because NFS node don't have proper locking.

So here's a diff to add locking to NFS nodes.  I couldn't reproduce the
panic above with it.  So I'd be interested if you could try it.  Note
that I didn't do much tests in write mode, so I'd suggest exporting your
"/mount" as 'ro' in a first time.  Diskless setups are also probably
broken.

Index: nfs/nfs_node.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_node.c,v
retrieving revision 1.65
diff -u -p -r1.65 nfs_node.c
--- nfs/nfs_node.c 27 Sep 2016 01:37:38 -0000 1.65
+++ nfs/nfs_node.c 20 Mar 2018 12:31:40 -0000
@@ -58,8 +58,6 @@
 struct pool nfs_node_pool;
 extern int prtactive;
 
-struct rwlock nfs_hashlock = RWLOCK_INITIALIZER("nfshshlk");
-
 /* XXX */
 extern struct vops nfs_vops;
 
@@ -98,12 +96,10 @@ nfs_nget(struct mount *mnt, nfsfh_t *fh,
  nmp = VFSTONFS(mnt);
 
 loop:
- rw_enter_write(&nfs_hashlock);
  find.n_fhp = fh;
  find.n_fhsize = fhsize;
  np = RBT_FIND(nfs_nodetree, &nmp->nm_ntree, &find);
  if (np != NULL) {
- rw_exit_write(&nfs_hashlock);
  vp = NFSTOV(np);
  error = vget(vp, LK_EXCLUSIVE, p);
  if (error)
@@ -120,25 +116,28 @@ loop:
  * to see if this nfsnode has been added while we did not hold
  * the lock.
  */
- rw_exit_write(&nfs_hashlock);
  error = getnewvnode(VT_NFS, mnt, &nfs_vops, &nvp);
  /* note that we don't have this vnode set up completely yet */
- rw_enter_write(&nfs_hashlock);
  if (error) {
  *npp = NULL;
- rw_exit_write(&nfs_hashlock);
  return (error);
  }
  nvp->v_flag |= VLARVAL;
- np = RBT_FIND(nfs_nodetree, &nmp->nm_ntree, &find);
- if (np != NULL) {
+ np = pool_get(&nfs_node_pool, PR_WAITOK | PR_ZERO);
+ /*
+ * getnewvnode() and pool_get() can sleep, check for race.
+ */
+ if (RBT_FIND(nfs_nodetree, &nmp->nm_ntree, &find) != NULL) {
+ pool_put(&nfs_node_pool, np);
  vgone(nvp);
- rw_exit_write(&nfs_hashlock);
  goto loop;
  }
 
  vp = nvp;
- np = pool_get(&nfs_node_pool, PR_WAITOK | PR_ZERO);
+#ifdef VFSLCKDEBUG
+ vp->v_flag |= VLOCKSWORK;
+#endif
+ rrw_init_flags(&np->n_lock, "nfsnode", RWL_DUPOK | RWL_IS_VNODE);
  vp->v_data = np;
  /* we now have an nfsnode on this vnode */
  vp->v_flag &= ~VLARVAL;
@@ -159,10 +158,11 @@ loop:
  np->n_fhp = &np->n_fh;
  bcopy(fh, np->n_fhp, fhsize);
  np->n_fhsize = fhsize;
+ /* lock the nfsnode, then put it on the rbtree */
+ rrw_enter(&np->n_lock, RW_WRITE);
  np2 = RBT_INSERT(nfs_nodetree, &nmp->nm_ntree, np);
  KASSERT(np2 == NULL);
  np->n_accstamp = -1;
- rw_exit(&nfs_hashlock);
  *npp = np;
 
  return (0);
@@ -201,9 +201,10 @@ nfs_inactive(void *v)
  * Remove the silly file that was rename'd earlier
  */
  nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, curproc);
+ vn_lock(sp->s_dvp, LK_EXCLUSIVE | LK_RETRY, curproc);
  nfs_removeit(sp);
  crfree(sp->s_cred);
- vrele(sp->s_dvp);
+ vput(sp->s_dvp);
  free(sp, M_NFSREQ, sizeof(*sp));
  }
  np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
@@ -239,9 +240,7 @@ nfs_reclaim(void *v)
     ap->a_vp);
 #endif
  nmp = VFSTONFS(vp->v_mount);
- rw_enter_write(&nfs_hashlock);
  RBT_REMOVE(nfs_nodetree, &nmp->nm_ntree, np);
- rw_exit_write(&nfs_hashlock);
 
  if (np->n_rcred)
  crfree(np->n_rcred);
Index: nfs/nfs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v
retrieving revision 1.116
diff -u -p -r1.116 nfs_vfsops.c
--- nfs/nfs_vfsops.c 10 Feb 2018 05:24:23 -0000 1.116
+++ nfs/nfs_vfsops.c 20 Mar 2018 10:27:24 -0000
@@ -178,7 +178,7 @@ nfs_statfs(struct mount *mp, struct stat
  copy_statfs_info(sbp, mp);
  m_freem(info.nmi_mrep);
 nfsmout:
- vrele(vp);
+ vput(vp);
  crfree(cred);
  return (error);
 }
@@ -282,6 +282,7 @@ nfs_mountroot(void)
  if (nfs_boot_getfh(&nfs_diskless.nd_boot, "root", &nfs_diskless.nd_root, -1))
  panic("nfs_mountroot: root");
  mp = nfs_mount_diskless(&nfs_diskless.nd_root, "/", 0);
+ /* XXX NFSLOCK */
  nfs_root(mp, &rootvp);
  printf("root on %s\n", nfs_diskless.nd_root.ndm_host);
 
@@ -334,6 +335,7 @@ nfs_mountroot(void)
  error = nfs_boot_getfh(&nfs_diskless.nd_boot, "swap", &nfs_diskless.nd_swap, 5);
  if (!error) {
  mp = nfs_mount_diskless(&nfs_diskless.nd_swap, "/swap", 0);
+ /* XXX NFSLOCK */
  nfs_root(mp, &vp);
  vfs_unbusy(mp);
 
Index: nfs/nfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vnops.c,v
retrieving revision 1.171
diff -u -p -r1.171 nfs_vnops.c
--- nfs/nfs_vnops.c 22 Feb 2017 11:42:46 -0000 1.171
+++ nfs/nfs_vnops.c 20 Mar 2018 12:50:58 -0000
@@ -88,7 +88,9 @@ int nfs_flush(struct vnode *, struct ucr
 int nfs_fsync(void *);
 int nfs_getattr(void *);
 int nfs_getreq(struct nfsrv_descript *, struct nfsd *, int);
+int nfs_islocked(void *);
 int nfs_link(void *);
+int nfs_lock(void *);
 int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct proc *,
  struct nfsnode **);
 int nfs_lookup(void *);
@@ -120,6 +122,7 @@ int nfs_sillyrename(struct vnode *, stru
  struct componentname *);
 int nfs_strategy(void *);
 int nfs_symlink(void *);
+int nfs_unlock(void *);
 
 void nfs_cache_enter(struct vnode *, struct vnode *, struct componentname *);
 
@@ -161,12 +164,12 @@ struct vops nfs_vops = {
  .vop_abortop = vop_generic_abortop,
  .vop_inactive = nfs_inactive,
  .vop_reclaim = nfs_reclaim,
- .vop_lock = vop_generic_lock, /* XXX: beck@ must fix this. */
- .vop_unlock = vop_generic_unlock,
+ .vop_lock = nfs_lock,
+ .vop_unlock = nfs_unlock,
  .vop_bmap = nfs_bmap,
  .vop_strategy = nfs_strategy,
  .vop_print = nfs_print,
- .vop_islocked = vop_generic_islocked,
+ .vop_islocked = nfs_islocked,
  .vop_pathconf = nfs_pathconf,
  .vop_advlock = nfs_advlock,
  .vop_bwrite = nfs_bwrite
@@ -769,6 +772,7 @@ nfs_lookup(void *v)
  flags = cnp->cn_flags;
 
  *vpp = NULLVP;
+ newvp = NULLVP;
  if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
     (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
  return (EROFS);
@@ -836,8 +840,10 @@ nfs_lookup(void *v)
  if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
  cnp->cn_flags |= SAVENAME;
  if ((!lockparent || !(flags & ISLASTCN)) &&
-     newvp != dvp)
+     newvp != dvp) {
  VOP_UNLOCK(dvp, p);
+ cnp->cn_flags |= PDIRUNLOCK;
+ }
  return (0);
  }
  cache_purge(newvp);
@@ -907,6 +913,9 @@ dorpc:
  */
 
  if (NFS_CMPFH(np, fhp, fhsize)) {
+ /*
+ * "." lookup
+ */
  vref(dvp);
  newvp = dvp;
  if (info.nmi_v3) {
@@ -915,6 +924,9 @@ dorpc:
  } else
  nfsm_loadattr(newvp, NULL);
  } else if (flags & ISDOTDOT) {
+ /*
+ * ".." lookup
+ */
  VOP_UNLOCK(dvp, p);
  cnp->cn_flags |= PDIRUNLOCK;
 
@@ -943,6 +955,9 @@ dorpc:
  }
 
  } else {
+ /*
+ * Other lookups.
+ */
  error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
  if (error) {
  m_freem(info.nmi_mrep);
@@ -1030,6 +1045,54 @@ nfs_readlink(void *v)
  return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
+#ifdef DDB
+#include <ddb/db_output.h>
+#endif
+
+/*
+ * Lock an inode.
+ */
+int
+nfs_lock(void *v)
+{
+ struct vop_lock_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+
+#if 0 //def DDB
+ db_stack_dump();
+#endif
+
+ return rrw_enter(&VTONFS(vp)->n_lock, ap->a_flags & LK_RWFLAGS);
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+nfs_unlock(void *v)
+{
+ struct vop_unlock_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+
+#if 0 //def DDB
+ db_stack_dump();
+#endif
+
+ rrw_exit(&VTONFS(vp)->n_lock);
+ return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+nfs_islocked(void *v)
+{
+ struct vop_islocked_args *ap = v;
+
+ return rrw_status(&VTONFS(ap->a_vp)->n_lock);
+}
+
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
@@ -1318,10 +1381,6 @@ nfs_mknodrpc(struct vnode *dvp, struct v
  if (!error) {
  nfsm_mtofh(dvp, newvp, info.nmi_v3, gotvp);
  if (!gotvp) {
- if (newvp) {
- vrele(newvp);
- newvp = NULL;
- }
  error = nfs_lookitup(dvp, cnp->cn_nameptr,
     cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
  if (!error)
@@ -1335,7 +1394,7 @@ nfs_mknodrpc(struct vnode *dvp, struct v
 nfsmout:
  if (error) {
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else {
  if (cnp->cn_flags & MAKEENTRY)
  nfs_cache_enter(dvp, newvp, cnp);
@@ -1345,7 +1404,7 @@ nfsmout:
  VTONFS(dvp)->n_flag |= NMODIFIED;
  if (!wccflag)
  NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1362,7 +1421,7 @@ nfs_mknod(void *v)
 
  error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
  if (!error)
- vrele(newvp);
+ vput(newvp);
 
  VN_KNOTE(ap->a_dvp, NOTE_WRITE);
 
@@ -1430,10 +1489,6 @@ again:
  if (!error) {
  nfsm_mtofh(dvp, newvp, info.nmi_v3, gotvp);
  if (!gotvp) {
- if (newvp) {
- vrele(newvp);
- newvp = NULL;
- }
  error = nfs_lookitup(dvp, cnp->cn_nameptr,
     cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
  if (!error)
@@ -1451,7 +1506,7 @@ nfsmout:
  goto again;
  }
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else if (info.nmi_v3 && (fmode & O_EXCL))
  error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
  if (!error) {
@@ -1464,7 +1519,7 @@ nfsmout:
  if (!wccflag)
  NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
  VN_KNOTE(ap->a_dvp, NOTE_WRITE);
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1530,11 +1585,12 @@ nfs_remove(void *v)
  error = nfs_sillyrename(dvp, vp, cnp);
  pool_put(&namei_pool, cnp->cn_pnbuf);
  NFS_INVALIDATE_ATTRCACHE(np);
- vrele(dvp);
- vrele(vp);
-
  VN_KNOTE(vp, NOTE_DELETE);
  VN_KNOTE(dvp, NOTE_WRITE);
+ if (dvp == vp)
+ vrele(vp);
+ else
+ vput(vp);
 
  return (error);
 }
@@ -1545,9 +1601,9 @@ nfs_remove(void *v)
 int
 nfs_removeit(struct sillyrename *sp)
 {
+ KASSERT(VOP_ISLOCKED(sp->s_dvp));
  /*
  * Make sure that the directory vnode is still valid.
- * XXX we should lock sp->s_dvp here.
  *
  * NFS can potentially try to nuke a silly *after* the directory
  * has already been pushed out on a forced unmount. Since the silly
@@ -1628,7 +1684,7 @@ nfs_rename(void *v)
  if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
     tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
  VN_KNOTE(tvp, NOTE_DELETE);
- vrele(tvp);
+ vput(tvp);
  tvp = NULL;
  }
 
@@ -1725,6 +1781,7 @@ nfs_link(void *v)
  struct vnode *vp = ap->a_vp;
  struct vnode *dvp = ap->a_dvp;
  struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
  struct nfsm_info info;
  u_int32_t *tl;
  int32_t t1;
@@ -1738,6 +1795,12 @@ nfs_link(void *v)
  vput(dvp);
  return (EXDEV);
  }
+ error = vn_lock(vp, LK_EXCLUSIVE, p);
+ if (error != 0) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
 
  /*
  * Push all writes to the server, so that the attribute cache
@@ -1771,6 +1834,7 @@ nfsmout:
 
  VN_KNOTE(vp, NOTE_LINK);
  VN_KNOTE(dvp, NOTE_WRITE);
+ VOP_UNLOCK(vp, p);
  vput(dvp);
  return (error);
 }
@@ -1827,13 +1891,13 @@ nfs_symlink(void *v)
 
 nfsmout:
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  pool_put(&namei_pool, cnp->cn_pnbuf);
  VTONFS(dvp)->n_flag |= NMODIFIED;
  if (!wccflag)
  NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
  VN_KNOTE(dvp, NOTE_WRITE);
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1904,7 +1968,7 @@ nfsmout:
  }
  if (error) {
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else {
  VN_KNOTE(dvp, NOTE_WRITE|NOTE_LINK);
  if (cnp->cn_flags & MAKEENTRY)
@@ -1912,7 +1976,7 @@ nfsmout:
  *ap->a_vpp = newvp;
  }
  pool_put(&namei_pool, cnp->cn_pnbuf);
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1936,7 +2000,7 @@ nfs_rmdir(void *v)
 
  if (dvp == vp) {
  vrele(dvp);
- vrele(dvp);
+ vput(dvp);
  pool_put(&namei_pool, cnp->cn_pnbuf);
  return (EINVAL);
  }
@@ -1964,8 +2028,8 @@ nfsmout:
  VN_KNOTE(vp, NOTE_DELETE);
 
  cache_purge(vp);
- vrele(vp);
- vrele(dvp);
+ vput(vp);
+ vput(dvp);
  /*
  * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
  */
@@ -2492,7 +2556,10 @@ nfs_readdirplusrpc(struct vnode *vp, str
  nfsm_adv(nfsm_rndup(i));
  }
  if (newvp != NULLVP) {
- vrele(newvp);
+ if (newvp == vp)
+ vrele(newvp);
+ else
+ vput(newvp);
  newvp = NULLVP;
  }
  nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
@@ -2533,8 +2600,12 @@ nfs_readdirplusrpc(struct vnode *vp, str
  }
 
 nfsmout:
- if (newvp != NULLVP)
- vrele(newvp);
+ if (newvp != NULLVP) {
+ if (newvp == vp)
+ vrele(newvp);
+ else
+ vput(newvp);
+ }
  return (error);
 }
 
@@ -2659,7 +2730,7 @@ nfs_lookitup(struct vnode *dvp, char *na
  nfsm_postop_attr(newvp, attrflag);
  if (!attrflag && *npp == NULL) {
  m_freem(info.nmi_mrep);
- vrele(newvp);
+ vput(newvp);
  return (ENOENT);
  }
  } else
@@ -2670,7 +2741,7 @@ nfsmout:
  if (npp && *npp == NULL) {
  if (error) {
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else
  *npp = np;
  }
Index: nfs/nfsnode.h
===================================================================
RCS file: /cvs/src/sys/nfs/nfsnode.h,v
retrieving revision 1.39
diff -u -p -r1.39 nfsnode.h
--- nfs/nfsnode.h 15 Dec 2009 15:53:48 -0000 1.39
+++ nfs/nfsnode.h 19 Mar 2018 14:13:17 -0000
@@ -43,7 +43,7 @@
 #include <nfs/nfs.h>
 #endif
 
-#include <sys/rwlock.h>
+#include <sys/lock.h>
 
 /*
  * Silly rename structure that hangs off the nfsnode until the name
@@ -79,6 +79,7 @@ struct nfsnode {
  nfsfh_t *n_fhp; /* NFS File Handle */
  struct vnode *n_vnode; /* associated vnode */
  struct lockf *n_lockf; /* Locking record of file */
+ struct rrwlock n_lock; /* NFSnode lock */
  int n_error; /* Save write error value */
  union {
  struct timespec nf_atim; /* Special file times */
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.99
diff -u -p -r1.99 uvm_vnode.c
--- uvm/uvm_vnode.c 8 Mar 2018 22:04:18 -0000 1.99
+++ uvm/uvm_vnode.c 20 Mar 2018 12:58:17 -0000
@@ -1105,6 +1105,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
  off_t file_offset;
  int waitf, result, mapinflags;
  size_t got, wanted;
+ int netlocked = 0;
 
  /* init values */
  waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
@@ -1174,18 +1175,24 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
  * Ideally, this kind of operation *should* work.
  */
  result = 0;
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
- result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc);
-
- if (result == 0) {
- int netlocked = (rw_status(&netlock) == RW_WRITE);
-
+ if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) {
  /*
  * This process may already have the NET_LOCK(), if we
  * faulted in copyin() or copyout() in the network stack.
  */
- if (netlocked)
+ if (rw_status(&netlock) == RW_WRITE) {
+ netlocked = 1;
+ NET_UNLOCK();
+ }
+
+ result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc);
+ }
+
+ if (result == 0) {
+ if (!netlocked && (rw_status(&netlock) == RW_WRITE)) {
+ netlocked = 1;
  NET_UNLOCK();
+ }
 
  /* NOTE: vnode now locked! */
  if (rw == UIO_READ)
@@ -1195,11 +1202,12 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
     (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
     curproc->p_ucred);
 
+ if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+ VOP_UNLOCK(vn, curproc);
+
  if (netlocked)
  NET_LOCK();
 
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
- VOP_UNLOCK(vn, curproc);
  }
 
  /* NOTE: vnode now unlocked (unless vnislocked) */

Reply | Threaded
Open this post in threaded view
|

Re: NFS socket use after free during reboot

Alexander Bluhm
On Tue, Mar 20, 2018 at 02:24:40PM +0100, Martin Pieuchot wrote:
> So here's a diff to add locking to NFS nodes.  I couldn't reproduce the
> panic above with it.  So I'd be interested if you could try it.  Note
> that I didn't do much tests in write mode, so I'd suggest exporting your
> "/mount" as 'ro' in a first time.  Diskless setups are also probably
> broken.

This diff fixes my reboot test case.  I was only using a read-only
mount when I reported the panic.

But now the /usr/src/regress/sys/ffs/nfs test hangs in "nfsnode".

bluhm

Reply | Threaded
Open this post in threaded view
|

Re: NFS socket use after free during reboot

Martin Pieuchot
On 20/03/18(Tue) 20:09, Alexander Bluhm wrote:

> On Tue, Mar 20, 2018 at 02:24:40PM +0100, Martin Pieuchot wrote:
> > So here's a diff to add locking to NFS nodes.  I couldn't reproduce the
> > panic above with it.  So I'd be interested if you could try it.  Note
> > that I didn't do much tests in write mode, so I'd suggest exporting your
> > "/mount" as 'ro' in a first time.  Diskless setups are also probably
> > broken.
>
> This diff fixes my reboot test case.  I was only using a read-only
> mount when I reported the panic.
>
> But now the /usr/src/regress/sys/ffs/nfs test hangs in "nfsnode".

Because I forgot to unlock the parent's vnode in nfs_remove(), diff below
fixes that.

Index: nfs/nfs_node.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_node.c,v
retrieving revision 1.65
diff -u -p -r1.65 nfs_node.c
--- nfs/nfs_node.c 27 Sep 2016 01:37:38 -0000 1.65
+++ nfs/nfs_node.c 20 Mar 2018 12:31:40 -0000
@@ -58,8 +58,6 @@
 struct pool nfs_node_pool;
 extern int prtactive;
 
-struct rwlock nfs_hashlock = RWLOCK_INITIALIZER("nfshshlk");
-
 /* XXX */
 extern struct vops nfs_vops;
 
@@ -98,12 +96,10 @@ nfs_nget(struct mount *mnt, nfsfh_t *fh,
  nmp = VFSTONFS(mnt);
 
 loop:
- rw_enter_write(&nfs_hashlock);
  find.n_fhp = fh;
  find.n_fhsize = fhsize;
  np = RBT_FIND(nfs_nodetree, &nmp->nm_ntree, &find);
  if (np != NULL) {
- rw_exit_write(&nfs_hashlock);
  vp = NFSTOV(np);
  error = vget(vp, LK_EXCLUSIVE, p);
  if (error)
@@ -120,25 +116,28 @@ loop:
  * to see if this nfsnode has been added while we did not hold
  * the lock.
  */
- rw_exit_write(&nfs_hashlock);
  error = getnewvnode(VT_NFS, mnt, &nfs_vops, &nvp);
  /* note that we don't have this vnode set up completely yet */
- rw_enter_write(&nfs_hashlock);
  if (error) {
  *npp = NULL;
- rw_exit_write(&nfs_hashlock);
  return (error);
  }
  nvp->v_flag |= VLARVAL;
- np = RBT_FIND(nfs_nodetree, &nmp->nm_ntree, &find);
- if (np != NULL) {
+ np = pool_get(&nfs_node_pool, PR_WAITOK | PR_ZERO);
+ /*
+ * getnewvnode() and pool_get() can sleep, check for race.
+ */
+ if (RBT_FIND(nfs_nodetree, &nmp->nm_ntree, &find) != NULL) {
+ pool_put(&nfs_node_pool, np);
  vgone(nvp);
- rw_exit_write(&nfs_hashlock);
  goto loop;
  }
 
  vp = nvp;
- np = pool_get(&nfs_node_pool, PR_WAITOK | PR_ZERO);
+#ifdef VFSLCKDEBUG
+ vp->v_flag |= VLOCKSWORK;
+#endif
+ rrw_init_flags(&np->n_lock, "nfsnode", RWL_DUPOK | RWL_IS_VNODE);
  vp->v_data = np;
  /* we now have an nfsnode on this vnode */
  vp->v_flag &= ~VLARVAL;
@@ -159,10 +158,11 @@ loop:
  np->n_fhp = &np->n_fh;
  bcopy(fh, np->n_fhp, fhsize);
  np->n_fhsize = fhsize;
+ /* lock the nfsnode, then put it on the rbtree */
+ rrw_enter(&np->n_lock, RW_WRITE);
  np2 = RBT_INSERT(nfs_nodetree, &nmp->nm_ntree, np);
  KASSERT(np2 == NULL);
  np->n_accstamp = -1;
- rw_exit(&nfs_hashlock);
  *npp = np;
 
  return (0);
@@ -201,9 +201,10 @@ nfs_inactive(void *v)
  * Remove the silly file that was rename'd earlier
  */
  nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, curproc);
+ vn_lock(sp->s_dvp, LK_EXCLUSIVE | LK_RETRY, curproc);
  nfs_removeit(sp);
  crfree(sp->s_cred);
- vrele(sp->s_dvp);
+ vput(sp->s_dvp);
  free(sp, M_NFSREQ, sizeof(*sp));
  }
  np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
@@ -239,9 +240,7 @@ nfs_reclaim(void *v)
     ap->a_vp);
 #endif
  nmp = VFSTONFS(vp->v_mount);
- rw_enter_write(&nfs_hashlock);
  RBT_REMOVE(nfs_nodetree, &nmp->nm_ntree, np);
- rw_exit_write(&nfs_hashlock);
 
  if (np->n_rcred)
  crfree(np->n_rcred);
Index: nfs/nfs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v
retrieving revision 1.116
diff -u -p -r1.116 nfs_vfsops.c
--- nfs/nfs_vfsops.c 10 Feb 2018 05:24:23 -0000 1.116
+++ nfs/nfs_vfsops.c 20 Mar 2018 10:27:24 -0000
@@ -178,7 +178,7 @@ nfs_statfs(struct mount *mp, struct stat
  copy_statfs_info(sbp, mp);
  m_freem(info.nmi_mrep);
 nfsmout:
- vrele(vp);
+ vput(vp);
  crfree(cred);
  return (error);
 }
@@ -282,6 +282,7 @@ nfs_mountroot(void)
  if (nfs_boot_getfh(&nfs_diskless.nd_boot, "root", &nfs_diskless.nd_root, -1))
  panic("nfs_mountroot: root");
  mp = nfs_mount_diskless(&nfs_diskless.nd_root, "/", 0);
+ /* XXX NFSLOCK */
  nfs_root(mp, &rootvp);
  printf("root on %s\n", nfs_diskless.nd_root.ndm_host);
 
@@ -334,6 +335,7 @@ nfs_mountroot(void)
  error = nfs_boot_getfh(&nfs_diskless.nd_boot, "swap", &nfs_diskless.nd_swap, 5);
  if (!error) {
  mp = nfs_mount_diskless(&nfs_diskless.nd_swap, "/swap", 0);
+ /* XXX NFSLOCK */
  nfs_root(mp, &vp);
  vfs_unbusy(mp);
 
Index: nfs/nfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vnops.c,v
retrieving revision 1.171
diff -u -p -r1.171 nfs_vnops.c
--- nfs/nfs_vnops.c 22 Feb 2017 11:42:46 -0000 1.171
+++ nfs/nfs_vnops.c 26 Mar 2018 14:37:00 -0000
@@ -88,7 +88,9 @@ int nfs_flush(struct vnode *, struct ucr
 int nfs_fsync(void *);
 int nfs_getattr(void *);
 int nfs_getreq(struct nfsrv_descript *, struct nfsd *, int);
+int nfs_islocked(void *);
 int nfs_link(void *);
+int nfs_lock(void *);
 int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct proc *,
  struct nfsnode **);
 int nfs_lookup(void *);
@@ -120,6 +122,7 @@ int nfs_sillyrename(struct vnode *, stru
  struct componentname *);
 int nfs_strategy(void *);
 int nfs_symlink(void *);
+int nfs_unlock(void *);
 
 void nfs_cache_enter(struct vnode *, struct vnode *, struct componentname *);
 
@@ -161,12 +164,12 @@ struct vops nfs_vops = {
  .vop_abortop = vop_generic_abortop,
  .vop_inactive = nfs_inactive,
  .vop_reclaim = nfs_reclaim,
- .vop_lock = vop_generic_lock, /* XXX: beck@ must fix this. */
- .vop_unlock = vop_generic_unlock,
+ .vop_lock = nfs_lock,
+ .vop_unlock = nfs_unlock,
  .vop_bmap = nfs_bmap,
  .vop_strategy = nfs_strategy,
  .vop_print = nfs_print,
- .vop_islocked = vop_generic_islocked,
+ .vop_islocked = nfs_islocked,
  .vop_pathconf = nfs_pathconf,
  .vop_advlock = nfs_advlock,
  .vop_bwrite = nfs_bwrite
@@ -769,6 +772,7 @@ nfs_lookup(void *v)
  flags = cnp->cn_flags;
 
  *vpp = NULLVP;
+ newvp = NULLVP;
  if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
     (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
  return (EROFS);
@@ -836,8 +840,10 @@ nfs_lookup(void *v)
  if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
  cnp->cn_flags |= SAVENAME;
  if ((!lockparent || !(flags & ISLASTCN)) &&
-     newvp != dvp)
+     newvp != dvp) {
  VOP_UNLOCK(dvp, p);
+ cnp->cn_flags |= PDIRUNLOCK;
+ }
  return (0);
  }
  cache_purge(newvp);
@@ -907,6 +913,9 @@ dorpc:
  */
 
  if (NFS_CMPFH(np, fhp, fhsize)) {
+ /*
+ * "." lookup
+ */
  vref(dvp);
  newvp = dvp;
  if (info.nmi_v3) {
@@ -915,6 +924,9 @@ dorpc:
  } else
  nfsm_loadattr(newvp, NULL);
  } else if (flags & ISDOTDOT) {
+ /*
+ * ".." lookup
+ */
  VOP_UNLOCK(dvp, p);
  cnp->cn_flags |= PDIRUNLOCK;
 
@@ -943,6 +955,9 @@ dorpc:
  }
 
  } else {
+ /*
+ * Other lookups.
+ */
  error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
  if (error) {
  m_freem(info.nmi_mrep);
@@ -1030,6 +1045,54 @@ nfs_readlink(void *v)
  return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
+#ifdef DDB
+#include <ddb/db_output.h>
+#endif
+
+/*
+ * Lock an inode.
+ */
+int
+nfs_lock(void *v)
+{
+ struct vop_lock_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+
+#if 0 //def DDB
+ db_stack_dump();
+#endif
+
+ return rrw_enter(&VTONFS(vp)->n_lock, ap->a_flags & LK_RWFLAGS);
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+nfs_unlock(void *v)
+{
+ struct vop_unlock_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+
+#if 0 //def DDB
+ db_stack_dump();
+#endif
+
+ rrw_exit(&VTONFS(vp)->n_lock);
+ return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+nfs_islocked(void *v)
+{
+ struct vop_islocked_args *ap = v;
+
+ return rrw_status(&VTONFS(ap->a_vp)->n_lock);
+}
+
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
@@ -1318,10 +1381,6 @@ nfs_mknodrpc(struct vnode *dvp, struct v
  if (!error) {
  nfsm_mtofh(dvp, newvp, info.nmi_v3, gotvp);
  if (!gotvp) {
- if (newvp) {
- vrele(newvp);
- newvp = NULL;
- }
  error = nfs_lookitup(dvp, cnp->cn_nameptr,
     cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
  if (!error)
@@ -1335,7 +1394,7 @@ nfs_mknodrpc(struct vnode *dvp, struct v
 nfsmout:
  if (error) {
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else {
  if (cnp->cn_flags & MAKEENTRY)
  nfs_cache_enter(dvp, newvp, cnp);
@@ -1345,7 +1404,7 @@ nfsmout:
  VTONFS(dvp)->n_flag |= NMODIFIED;
  if (!wccflag)
  NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1362,7 +1421,7 @@ nfs_mknod(void *v)
 
  error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
  if (!error)
- vrele(newvp);
+ vput(newvp);
 
  VN_KNOTE(ap->a_dvp, NOTE_WRITE);
 
@@ -1430,10 +1489,6 @@ again:
  if (!error) {
  nfsm_mtofh(dvp, newvp, info.nmi_v3, gotvp);
  if (!gotvp) {
- if (newvp) {
- vrele(newvp);
- newvp = NULL;
- }
  error = nfs_lookitup(dvp, cnp->cn_nameptr,
     cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
  if (!error)
@@ -1451,7 +1506,7 @@ nfsmout:
  goto again;
  }
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else if (info.nmi_v3 && (fmode & O_EXCL))
  error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
  if (!error) {
@@ -1464,7 +1519,7 @@ nfsmout:
  if (!wccflag)
  NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
  VN_KNOTE(ap->a_dvp, NOTE_WRITE);
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1530,12 +1585,13 @@ nfs_remove(void *v)
  error = nfs_sillyrename(dvp, vp, cnp);
  pool_put(&namei_pool, cnp->cn_pnbuf);
  NFS_INVALIDATE_ATTRCACHE(np);
- vrele(dvp);
- vrele(vp);
-
  VN_KNOTE(vp, NOTE_DELETE);
  VN_KNOTE(dvp, NOTE_WRITE);
-
+ if (dvp == vp)
+ vrele(vp);
+ else
+ vput(vp);
+ vput(dvp);
  return (error);
 }
 
@@ -1545,9 +1601,9 @@ nfs_remove(void *v)
 int
 nfs_removeit(struct sillyrename *sp)
 {
+ KASSERT(VOP_ISLOCKED(sp->s_dvp));
  /*
  * Make sure that the directory vnode is still valid.
- * XXX we should lock sp->s_dvp here.
  *
  * NFS can potentially try to nuke a silly *after* the directory
  * has already been pushed out on a forced unmount. Since the silly
@@ -1628,7 +1684,7 @@ nfs_rename(void *v)
  if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
     tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
  VN_KNOTE(tvp, NOTE_DELETE);
- vrele(tvp);
+ vput(tvp);
  tvp = NULL;
  }
 
@@ -1725,6 +1781,7 @@ nfs_link(void *v)
  struct vnode *vp = ap->a_vp;
  struct vnode *dvp = ap->a_dvp;
  struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
  struct nfsm_info info;
  u_int32_t *tl;
  int32_t t1;
@@ -1738,6 +1795,12 @@ nfs_link(void *v)
  vput(dvp);
  return (EXDEV);
  }
+ error = vn_lock(vp, LK_EXCLUSIVE, p);
+ if (error != 0) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
 
  /*
  * Push all writes to the server, so that the attribute cache
@@ -1771,6 +1834,7 @@ nfsmout:
 
  VN_KNOTE(vp, NOTE_LINK);
  VN_KNOTE(dvp, NOTE_WRITE);
+ VOP_UNLOCK(vp, p);
  vput(dvp);
  return (error);
 }
@@ -1827,13 +1891,13 @@ nfs_symlink(void *v)
 
 nfsmout:
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  pool_put(&namei_pool, cnp->cn_pnbuf);
  VTONFS(dvp)->n_flag |= NMODIFIED;
  if (!wccflag)
  NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
  VN_KNOTE(dvp, NOTE_WRITE);
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1904,7 +1968,7 @@ nfsmout:
  }
  if (error) {
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else {
  VN_KNOTE(dvp, NOTE_WRITE|NOTE_LINK);
  if (cnp->cn_flags & MAKEENTRY)
@@ -1912,7 +1976,7 @@ nfsmout:
  *ap->a_vpp = newvp;
  }
  pool_put(&namei_pool, cnp->cn_pnbuf);
- vrele(dvp);
+ vput(dvp);
  return (error);
 }
 
@@ -1936,7 +2000,7 @@ nfs_rmdir(void *v)
 
  if (dvp == vp) {
  vrele(dvp);
- vrele(dvp);
+ vput(dvp);
  pool_put(&namei_pool, cnp->cn_pnbuf);
  return (EINVAL);
  }
@@ -1964,8 +2028,8 @@ nfsmout:
  VN_KNOTE(vp, NOTE_DELETE);
 
  cache_purge(vp);
- vrele(vp);
- vrele(dvp);
+ vput(vp);
+ vput(dvp);
  /*
  * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
  */
@@ -2492,7 +2556,10 @@ nfs_readdirplusrpc(struct vnode *vp, str
  nfsm_adv(nfsm_rndup(i));
  }
  if (newvp != NULLVP) {
- vrele(newvp);
+ if (newvp == vp)
+ vrele(newvp);
+ else
+ vput(newvp);
  newvp = NULLVP;
  }
  nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
@@ -2533,8 +2600,12 @@ nfs_readdirplusrpc(struct vnode *vp, str
  }
 
 nfsmout:
- if (newvp != NULLVP)
- vrele(newvp);
+ if (newvp != NULLVP) {
+ if (newvp == vp)
+ vrele(newvp);
+ else
+ vput(newvp);
+ }
  return (error);
 }
 
@@ -2659,7 +2730,7 @@ nfs_lookitup(struct vnode *dvp, char *na
  nfsm_postop_attr(newvp, attrflag);
  if (!attrflag && *npp == NULL) {
  m_freem(info.nmi_mrep);
- vrele(newvp);
+ vput(newvp);
  return (ENOENT);
  }
  } else
@@ -2670,7 +2741,7 @@ nfsmout:
  if (npp && *npp == NULL) {
  if (error) {
  if (newvp)
- vrele(newvp);
+ vput(newvp);
  } else
  *npp = np;
  }
Index: nfs/nfsnode.h
===================================================================
RCS file: /cvs/src/sys/nfs/nfsnode.h,v
retrieving revision 1.39
diff -u -p -r1.39 nfsnode.h
--- nfs/nfsnode.h 15 Dec 2009 15:53:48 -0000 1.39
+++ nfs/nfsnode.h 19 Mar 2018 14:13:17 -0000
@@ -43,7 +43,7 @@
 #include <nfs/nfs.h>
 #endif
 
-#include <sys/rwlock.h>
+#include <sys/lock.h>
 
 /*
  * Silly rename structure that hangs off the nfsnode until the name
@@ -79,6 +79,7 @@ struct nfsnode {
  nfsfh_t *n_fhp; /* NFS File Handle */
  struct vnode *n_vnode; /* associated vnode */
  struct lockf *n_lockf; /* Locking record of file */
+ struct rrwlock n_lock; /* NFSnode lock */
  int n_error; /* Save write error value */
  union {
  struct timespec nf_atim; /* Special file times */
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.99
diff -u -p -r1.99 uvm_vnode.c
--- uvm/uvm_vnode.c 8 Mar 2018 22:04:18 -0000 1.99
+++ uvm/uvm_vnode.c 20 Mar 2018 12:58:17 -0000
@@ -1105,6 +1105,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
  off_t file_offset;
  int waitf, result, mapinflags;
  size_t got, wanted;
+ int netlocked = 0;
 
  /* init values */
  waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
@@ -1174,18 +1175,24 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
  * Ideally, this kind of operation *should* work.
  */
  result = 0;
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
- result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc);
-
- if (result == 0) {
- int netlocked = (rw_status(&netlock) == RW_WRITE);
-
+ if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) {
  /*
  * This process may already have the NET_LOCK(), if we
  * faulted in copyin() or copyout() in the network stack.
  */
- if (netlocked)
+ if (rw_status(&netlock) == RW_WRITE) {
+ netlocked = 1;
+ NET_UNLOCK();
+ }
+
+ result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc);
+ }
+
+ if (result == 0) {
+ if (!netlocked && (rw_status(&netlock) == RW_WRITE)) {
+ netlocked = 1;
  NET_UNLOCK();
+ }
 
  /* NOTE: vnode now locked! */
  if (rw == UIO_READ)
@@ -1195,11 +1202,12 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
     (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
     curproc->p_ucred);
 
+ if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+ VOP_UNLOCK(vn, curproc);
+
  if (netlocked)
  NET_LOCK();
 
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
- VOP_UNLOCK(vn, curproc);
  }
 
  /* NOTE: vnode now unlocked (unless vnislocked) */