[PATCH] net: prevent if_clone_destroy from racing with rest of stack

classic Classic list List threaded Threaded
14 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] net: prevent if_clone_destroy from racing with rest of stack

Jason A. Donenfeld
You can crash a system by running something like:

    for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig bridge0 destroy& done& done

This works with every type of interface I've tried. It appears that
if_clone_destroy and if_clone_create race with other ioctls, which
causes a variety of different UaFs or just general logic errors.

One common root cause appears to be that most ifioctl functions use
ifunit() to find an interface by name, which traverses if_list. Writes
to if_list are protected by a lock, but reads are apparently
unprotected. There's also the question of the life time of the object
returned from ifunit(). Most things that access &ifnet's if_list are
done without locking, and even if those accesses were to be locked, the
lock would be released before the object is no longer used, causing the
UaF in that case as well.

This patch fixes the issue by making if_clone_{create,destroy} exclusive
with all other ifioctls.
---
 sys/net/if.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git sys/net/if.c sys/net/if.c
index fb2f86f4a7c..9eedea83d32 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -246,6 +246,11 @@ struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
  */
 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
 
+/*
+ * Protect modifications to objects owned by ifnet's if_list
+ */
+struct rwlock iflist_obj_lock = RWLOCK_INITIALIZER("iflist_obj_lock");
+
 /*
  * Network interface utility routines.
  */
@@ -1905,7 +1910,7 @@ if_setrdomain(struct ifnet *ifp, int rdomain)
  * Interface ioctls.
  */
 int
-ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
+ifioctl_unlocked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 {
  struct ifnet *ifp;
  struct ifreq *ifr = (struct ifreq *)data;
@@ -2432,6 +2437,35 @@ ifioctl_get(u_long cmd, caddr_t data)
  return (error);
 }
 
+int
+ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
+{
+ int ret;
+
+ switch (cmd) {
+ case SIOCIFCREATE:
+ case SIOCIFDESTROY:
+ rw_enter_write(&iflist_obj_lock);
+ break;
+ default:
+ rw_enter_read(&iflist_obj_lock);
+ }
+
+ ret = ifioctl_unlocked(so, cmd, data, p);
+
+ switch (cmd) {
+ case SIOCIFCREATE:
+ case SIOCIFDESTROY:
+ rw_exit_write(&iflist_obj_lock);
+ break;
+ default:
+ rw_exit_read(&iflist_obj_lock);
+ }
+
+ return (ret);
+}
+
+
 static int
 if_sffpage_check(const caddr_t data)
 {
--
2.27.0

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Martin Pieuchot
On 23/06/20(Tue) 01:00, Jason A. Donenfeld wrote:
> You can crash a system by running something like:
>
>     for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig bridge0 destroy& done& done
>
> This works with every type of interface I've tried. It appears that
> if_clone_destroy and if_clone_create race with other ioctls, which
> causes a variety of different UaFs or just general logic errors.

Thanks for the report.  This is a long standing & known issue.
 
> One common root cause appears to be that most ifioctl functions use
> ifunit() to find an interface by name, which traverses if_list. Writes
> to if_list are protected by a lock, but reads are apparently
> unprotected. There's also the question of the life time of the object
> returned from ifunit(). Most things that access &ifnet's if_list are
> done without locking, and even if those accesses were to be locked, the
> lock would be released before the object is no longer used, causing the
> UaF in that case as well.

Any sleeping point between the first ifunit() and the end of `ifc_create'
or `ifc_destroy' respectively can lead to races.

> This patch fixes the issue by making if_clone_{create,destroy} exclusive
> with all other ifioctls.

Diff below achieves the same but moves the locking inside the if_clone*
functions such that consumers like tun(4), bridge(4) and switch(4) which
clone interfaces upon open(2) are also serialized.

I also added a NET_ASSERT_UNLOCKED() in both functions because the new
lock must be grabbed before the NET_LOCK() in order to allow ifc_create
and ifc_destroy to manipulate data structures protected by this lock.

Comments, ok?

Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.610
diff -u -p -r1.610 if.c
--- net/if.c 22 Jun 2020 09:45:13 -0000 1.610
+++ net/if.c 23 Jun 2020 10:25:41 -0000
@@ -224,6 +224,7 @@ void if_idxmap_remove(struct ifnet *);
 
 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
 
+struct rwlock if_clone_lock = RWLOCK_INITIALIZER("if_clone_lock");
 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
 int if_cloners_count;
 
@@ -1246,24 +1247,36 @@ if_clone_create(const char *name, int rd
  struct ifnet *ifp;
  int unit, ret;
 
- ifc = if_clone_lookup(name, &unit);
- if (ifc == NULL)
- return (EINVAL);
+ NET_ASSERT_UNLOCKED();
+ rw_enter_write(&if_clone_lock);
 
- if (ifunit(name) != NULL)
- return (EEXIST);
+ ifc = if_clone_lookup(name, &unit);
+ if (ifc == NULL) {
+ ret = EINVAL;
+ goto out;
+ }
+ if (ifunit(name) != NULL) {
+ ret = EEXIST;
+ goto out;
+ }
 
  ret = (*ifc->ifc_create)(ifc, unit);
+ if (ret != 0)
+ goto out;
 
- if (ret != 0 || (ifp = ifunit(name)) == NULL)
- return (ret);
+ ifp = ifunit(name);
+ if (ifp == NULL) {
+ ret = EAGAIN;
+ goto out;
+ }
 
  NET_LOCK();
  if_addgroup(ifp, ifc->ifc_name);
  if (rdomain != 0)
  if_setrdomain(ifp, rdomain);
  NET_UNLOCK();
-
+out:
+ rw_exit_write(&if_clone_lock);
  return (ret);
 }
 
@@ -1277,16 +1290,23 @@ if_clone_destroy(const char *name)
  struct ifnet *ifp;
  int ret;
 
- ifc = if_clone_lookup(name, NULL);
- if (ifc == NULL)
- return (EINVAL);
+ NET_ASSERT_UNLOCKED();
+ rw_enter_write(&if_clone_lock);
 
+ ifc = if_clone_lookup(name, NULL);
+ if (ifc == NULL) {
+ ret = EINVAL;
+ goto out;
+ }
  ifp = ifunit(name);
- if (ifp == NULL)
- return (ENXIO);
-
- if (ifc->ifc_destroy == NULL)
- return (EOPNOTSUPP);
+ if (ifp == NULL) {
+ ret = ENXIO;
+ goto out;
+ }
+ if (ifc->ifc_destroy == NULL) {
+ ret = EOPNOTSUPP;
+ goto out;
+ }
 
  NET_LOCK();
  if (ifp->if_flags & IFF_UP) {
@@ -1297,7 +1317,8 @@ if_clone_destroy(const char *name)
  }
  NET_UNLOCK();
  ret = (*ifc->ifc_destroy)(ifp);
-
+out:
+ rw_exit_write(&if_clone_lock);
  return (ret);
 }
 

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Jason A. Donenfeld
On 6/23/20, Martin Pieuchot <[hidden email]> wrote:

> On 23/06/20(Tue) 01:00, Jason A. Donenfeld wrote:
>> You can crash a system by running something like:
>>
>>     for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig
>> bridge0 destroy& done& done
>>
>> This works with every type of interface I've tried. It appears that
>> if_clone_destroy and if_clone_create race with other ioctls, which
>> causes a variety of different UaFs or just general logic errors.
>
> Thanks for the report.  This is a long standing & known issue.
>
>> One common root cause appears to be that most ifioctl functions use
>> ifunit() to find an interface by name, which traverses if_list. Writes
>> to if_list are protected by a lock, but reads are apparently
>> unprotected. There's also the question of the life time of the object
>> returned from ifunit(). Most things that access &ifnet's if_list are
>> done without locking, and even if those accesses were to be locked, the
>> lock would be released before the object is no longer used, causing the
>> UaF in that case as well.
>
> Any sleeping point between the first ifunit() and the end of `ifc_create'
> or `ifc_destroy' respectively can lead to races.
>
>> This patch fixes the issue by making if_clone_{create,destroy} exclusive
>> with all other ifioctls.
>
> Diff below achieves the same but moves the locking inside the if_clone*
> functions such that consumers like tun(4), bridge(4) and switch(4) which
> clone interfaces upon open(2) are also serialized.
>
> I also added a NET_ASSERT_UNLOCKED() in both functions because the new
> lock must be grabbed before the NET_LOCK() in order to allow ifc_create
> and ifc_destroy to manipulate data structures protected by this lock.
>
> Comments, ok?

Not okay. This is the first thing I tried, and it still races with
ifioctl_get, causing UaF crashes. It's harder to trigger, but still
possible after a few minutes of races. This structure here needs to be
a read/write lock, which is what my original patch provides. (I guess
I forgot to add the "ok?" epilogue to it.)


>
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.610
> diff -u -p -r1.610 if.c
> --- net/if.c 22 Jun 2020 09:45:13 -0000 1.610
> +++ net/if.c 23 Jun 2020 10:25:41 -0000
> @@ -224,6 +224,7 @@ void if_idxmap_remove(struct ifnet *);
>
>  TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
>
> +struct rwlock if_clone_lock = RWLOCK_INITIALIZER("if_clone_lock");
>  LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
>  int if_cloners_count;
>
> @@ -1246,24 +1247,36 @@ if_clone_create(const char *name, int rd
>   struct ifnet *ifp;
>   int unit, ret;
>
> - ifc = if_clone_lookup(name, &unit);
> - if (ifc == NULL)
> - return (EINVAL);
> + NET_ASSERT_UNLOCKED();
> + rw_enter_write(&if_clone_lock);
>
> - if (ifunit(name) != NULL)
> - return (EEXIST);
> + ifc = if_clone_lookup(name, &unit);
> + if (ifc == NULL) {
> + ret = EINVAL;
> + goto out;
> + }
> + if (ifunit(name) != NULL) {
> + ret = EEXIST;
> + goto out;
> + }
>
>   ret = (*ifc->ifc_create)(ifc, unit);
> + if (ret != 0)
> + goto out;
>
> - if (ret != 0 || (ifp = ifunit(name)) == NULL)
> - return (ret);
> + ifp = ifunit(name);
> + if (ifp == NULL) {
> + ret = EAGAIN;
> + goto out;
> + }
>
>   NET_LOCK();
>   if_addgroup(ifp, ifc->ifc_name);
>   if (rdomain != 0)
>   if_setrdomain(ifp, rdomain);
>   NET_UNLOCK();
> -
> +out:
> + rw_exit_write(&if_clone_lock);
>   return (ret);
>  }
>
> @@ -1277,16 +1290,23 @@ if_clone_destroy(const char *name)
>   struct ifnet *ifp;
>   int ret;
>
> - ifc = if_clone_lookup(name, NULL);
> - if (ifc == NULL)
> - return (EINVAL);
> + NET_ASSERT_UNLOCKED();
> + rw_enter_write(&if_clone_lock);
>
> + ifc = if_clone_lookup(name, NULL);
> + if (ifc == NULL) {
> + ret = EINVAL;
> + goto out;
> + }
>   ifp = ifunit(name);
> - if (ifp == NULL)
> - return (ENXIO);
> -
> - if (ifc->ifc_destroy == NULL)
> - return (EOPNOTSUPP);
> + if (ifp == NULL) {
> + ret = ENXIO;
> + goto out;
> + }
> + if (ifc->ifc_destroy == NULL) {
> + ret = EOPNOTSUPP;
> + goto out;
> + }
>
>   NET_LOCK();
>   if (ifp->if_flags & IFF_UP) {
> @@ -1297,7 +1317,8 @@ if_clone_destroy(const char *name)
>   }
>   NET_UNLOCK();
>   ret = (*ifc->ifc_destroy)(ifp);
> -
> +out:
> + rw_exit_write(&if_clone_lock);
>   return (ret);
>  }
>
>

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Martin Pieuchot
On 23/06/20(Tue) 04:53, Jason A. Donenfeld wrote:

> On 6/23/20, Martin Pieuchot <[hidden email]> wrote:
> > On 23/06/20(Tue) 01:00, Jason A. Donenfeld wrote:
> >> You can crash a system by running something like:
> >>
> >>     for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig
> >> bridge0 destroy& done& done
> >>
> >> This works with every type of interface I've tried. It appears that
> >> if_clone_destroy and if_clone_create race with other ioctls, which
> >> causes a variety of different UaFs or just general logic errors.
> >
> > Thanks for the report.  This is a long standing & known issue.
> >
> >> One common root cause appears to be that most ifioctl functions use
> >> ifunit() to find an interface by name, which traverses if_list. Writes
> >> to if_list are protected by a lock, but reads are apparently
> >> unprotected. There's also the question of the life time of the object
> >> returned from ifunit(). Most things that access &ifnet's if_list are
> >> done without locking, and even if those accesses were to be locked, the
> >> lock would be released before the object is no longer used, causing the
> >> UaF in that case as well.
> >
> > Any sleeping point between the first ifunit() and the end of `ifc_create'
> > or `ifc_destroy' respectively can lead to races.
> >
> >> This patch fixes the issue by making if_clone_{create,destroy} exclusive
> >> with all other ifioctls.
> >
> > Diff below achieves the same but moves the locking inside the if_clone*
> > functions such that consumers like tun(4), bridge(4) and switch(4) which
> > clone interfaces upon open(2) are also serialized.
> >
> > I also added a NET_ASSERT_UNLOCKED() in both functions because the new
> > lock must be grabbed before the NET_LOCK() in order to allow ifc_create
> > and ifc_destroy to manipulate data structures protected by this lock.
> >
> > Comments, ok?
>
> Not okay. This is the first thing I tried, and it still races with
> ifioctl_get, causing UaF crashes. It's harder to trigger, but still
> possible after a few minutes of races. This structure here needs to be
> a read/write lock, which is what my original patch provides. (I guess
> I forgot to add the "ok?" epilogue to it.)

I'd argue this is a related problem but a different one.  The diff I
sent serializes cloning/destroying pseudo-interfaces.  It has value on
its own because *all* if_clone_*() operations are now serialized.

Now you correctly points out that it doesn't fix all the races in the
ioctl path.  That's a fact.  However without the informations of the
crashes it is hard to reason about the uncounted reference returned by
ifunit().

Taking a rwlock around all ioctl(2) can have effects that are hard to
debug.

> > Index: net/if.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if.c,v
> > retrieving revision 1.610
> > diff -u -p -r1.610 if.c
> > --- net/if.c 22 Jun 2020 09:45:13 -0000 1.610
> > +++ net/if.c 23 Jun 2020 10:25:41 -0000
> > @@ -224,6 +224,7 @@ void if_idxmap_remove(struct ifnet *);
> >
> >  TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
> >
> > +struct rwlock if_clone_lock = RWLOCK_INITIALIZER("if_clone_lock");
> >  LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
> >  int if_cloners_count;
> >
> > @@ -1246,24 +1247,36 @@ if_clone_create(const char *name, int rd
> >   struct ifnet *ifp;
> >   int unit, ret;
> >
> > - ifc = if_clone_lookup(name, &unit);
> > - if (ifc == NULL)
> > - return (EINVAL);
> > + NET_ASSERT_UNLOCKED();
> > + rw_enter_write(&if_clone_lock);
> >
> > - if (ifunit(name) != NULL)
> > - return (EEXIST);
> > + ifc = if_clone_lookup(name, &unit);
> > + if (ifc == NULL) {
> > + ret = EINVAL;
> > + goto out;
> > + }
> > + if (ifunit(name) != NULL) {
> > + ret = EEXIST;
> > + goto out;
> > + }
> >
> >   ret = (*ifc->ifc_create)(ifc, unit);
> > + if (ret != 0)
> > + goto out;
> >
> > - if (ret != 0 || (ifp = ifunit(name)) == NULL)
> > - return (ret);
> > + ifp = ifunit(name);
> > + if (ifp == NULL) {
> > + ret = EAGAIN;
> > + goto out;
> > + }
> >
> >   NET_LOCK();
> >   if_addgroup(ifp, ifc->ifc_name);
> >   if (rdomain != 0)
> >   if_setrdomain(ifp, rdomain);
> >   NET_UNLOCK();
> > -
> > +out:
> > + rw_exit_write(&if_clone_lock);
> >   return (ret);
> >  }
> >
> > @@ -1277,16 +1290,23 @@ if_clone_destroy(const char *name)
> >   struct ifnet *ifp;
> >   int ret;
> >
> > - ifc = if_clone_lookup(name, NULL);
> > - if (ifc == NULL)
> > - return (EINVAL);
> > + NET_ASSERT_UNLOCKED();
> > + rw_enter_write(&if_clone_lock);
> >
> > + ifc = if_clone_lookup(name, NULL);
> > + if (ifc == NULL) {
> > + ret = EINVAL;
> > + goto out;
> > + }
> >   ifp = ifunit(name);
> > - if (ifp == NULL)
> > - return (ENXIO);
> > -
> > - if (ifc->ifc_destroy == NULL)
> > - return (EOPNOTSUPP);
> > + if (ifp == NULL) {
> > + ret = ENXIO;
> > + goto out;
> > + }
> > + if (ifc->ifc_destroy == NULL) {
> > + ret = EOPNOTSUPP;
> > + goto out;
> > + }
> >
> >   NET_LOCK();
> >   if (ifp->if_flags & IFF_UP) {
> > @@ -1297,7 +1317,8 @@ if_clone_destroy(const char *name)
> >   }
> >   NET_UNLOCK();
> >   ret = (*ifc->ifc_destroy)(ifp);
> > -
> > +out:
> > + rw_exit_write(&if_clone_lock);
> >   return (ret);
> >  }
> >
> >
>

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Jason A. Donenfeld
On Tue, Jun 23, 2020 at 8:21 AM Martin Pieuchot <[hidden email]> wrote:

> I'd argue this is a related problem but a different one.  The diff I
> sent serializes cloning/destroying pseudo-interfaces.  It has value on
> its own because *all* if_clone_*() operations are now serialized.
>
> Now you correctly points out that it doesn't fix all the races in the
> ioctl path.  That's a fact.  However without the informations of the
> crashes it is hard to reason about the uncounted reference returned by
> ifunit().
>
> Taking a rwlock around all ioctl(2) can have effects that are hard to
> debug.

We're talking about the same resource and lookup structure, so
generally it makes sense to protect that the same way, right? Adding
and removing ifps, and adding and them and removing them from the list
of ifps, all need to be synchronized with the read-only usage of those
ifps. The other way to fix it would be avoiding a critical section
entirely by incrementing a refcount during the if_list lookup.

Either way, it sounds like the big problem you're pointing out with my
patch is that you fear some of those ioctls need to be callable from
contexts that cannot sleep, which means using an rwlock is wrong. It
doesn't look like the mutex spinlock has a r/w variant though. Or am I
missing that?

Jason

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Martin Pieuchot
On 23/06/20(Tue) 17:21, Jason A. Donenfeld wrote:

> On Tue, Jun 23, 2020 at 8:21 AM Martin Pieuchot <[hidden email]> wrote:
> > I'd argue this is a related problem but a different one.  The diff I
> > sent serializes cloning/destroying pseudo-interfaces.  It has value on
> > its own because *all* if_clone_*() operations are now serialized.
> >
> > Now you correctly points out that it doesn't fix all the races in the
> > ioctl path.  That's a fact.  However without the informations of the
> > crashes it is hard to reason about the uncounted reference returned by
> > ifunit().
> >
> > Taking a rwlock around all ioctl(2) can have effects that are hard to
> > debug.
>
> We're talking about the same resource and lookup structure, so
> generally it makes sense to protect that the same way, right? Adding
> and removing ifps, and adding and them and removing them from the list
> of ifps, all need to be synchronized with the read-only usage of those
> ifps. The other way to fix it would be avoiding a critical section
> entirely by incrementing a refcount during the if_list lookup.

Yes, that might be a better way.  If I understood your original mail the
issue is related to ifunit(), right?  ifunit() is not used in packet-
processing contexts, that's why we did not protect it by the NET_LOCK().

I'm not sure if all the ifunit() usages are necessary, many of them are
certainly exposing races with attach/destroy.

> Either way, it sounds like the big problem you're pointing out with my
> patch is that you fear some of those ioctls need to be callable from
> contexts that cannot sleep, which means using an rwlock is wrong. It
> doesn't look like the mutex spinlock has a r/w variant though. Or am I
> missing that?

Taking a rwlock for all ioctl(2) has a big impact.  The 'default' case
of the switch in ifioctl() goes into other subsystems and per-ifp ioctl
handler.

Many drivers sleep in their ioctl(2) handler, most USB and wireless one
to name a few.  Past experience showed that holding a rwlock around all
the handlers, led to (temporarily) "deadlock".  This is why vio(4) mailbox
sleep has been turned into a rwsleep releasing the `netlock'.

One can argue that such deadlocks happen because the scope of the lock is
huge.  This is easy to understand with the `netlock' and questionable,
at the time of the diff, with the proposed `if_list_lock'.  But saying
so would miss the point: throwing a lock around a huge part of code to
make symptoms disappear is a big hammer.

If the problem we're trying to fix is the reference counting of ifunit()
then I'd suggest to fix that and only that in all the tree.

If what we're after is serializing clone/destroy then let's do that.

The diff proposed did not dealt with all usages of any of the two
scenario.  I'd be glad to see the whole kernel has been considered and
the scope of any new lock is as small as possible.

Obviously I've been trying to reduce the scope of locks during years, so
I'm biased ;)

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Jason A. Donenfeld
Hi Martin,

On Wed, Jun 24, 2020 at 4:02 AM Martin Pieuchot <[hidden email]> wrote:
> Yes, that might be a better way.  If I understood your original mail the
> issue is related to ifunit(), right?  ifunit() is not used in packet-
> processing contexts, that's why we did not protect it by the NET_LOCK().
>
> I'm not sure if all the ifunit() usages are necessary, many of them are
> certainly exposing races with attach/destroy.

No, not _just_ ifunit, but ifunit is one of the places where this is
hit. But just one.

> Taking a rwlock for all ioctl(2) has a big impact.  The 'default' case
> of the switch in ifioctl() goes into other subsystems and per-ifp ioctl
> handler.
>
> Many drivers sleep in their ioctl(2) handler, most USB and wireless one
> to name a few.  Past experience showed that holding a rwlock around all
> the handlers, led to (temporarily) "deadlock".  This is why vio(4) mailbox
> sleep has been turned into a rwsleep releasing the `netlock'.
>
> One can argue that such deadlocks happen because the scope of the lock is
> huge.  This is easy to understand with the `netlock' and questionable,
> at the time of the diff, with the proposed `if_list_lock'.  But saying
> so would miss the point: throwing a lock around a huge part of code to
> make symptoms disappear is a big hammer.

Oh, that's your concern. I understand what you're concerned with now.
However, I think that in light of that, you've misunderstood the
original patch, and I'm now more convinced that the original patch is
the correct route.

The original patchset:
a. Uses an exclusive lock for clone/destroy.
b. Uses a shared lock for all other ioctls.

This means that all ioctls except clone/destroy can run without
blocking each other. So, there's no deadlock issues, and no speed
issues, and no lack of coarseness of locking. What this patch set does
add is:
1. Other ioctls are not permitted to run at the same time as clone/destroy.
2. Clone and destroy are not allowed to run at the same time as each other.
However:
3. Other ioctls ARE allowed to run at the same time as other ioctls,
so no blocking or deadlock issues.

Given the object lifetime and lookup structure design, these seem to
be the optimal set of circumstances.

Jason

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Vitaliy Makkoveev
In reply to this post by Jason A. Donenfeld
> On 23 Jun 2020, at 10:00, Jason A. Donenfeld <[hidden email]> wrote:
>
> You can crash a system by running something like:
>
>    for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig bridge0 destroy& done& done
>
> This works with every type of interface I've tried. It appears that
> if_clone_destroy and if_clone_create race with other ioctls, which
> causes a variety of different UaFs or just general logic errors.
>
> One common root cause appears to be that most ifioctl functions use
> ifunit() to find an interface by name, which traverses if_list. Writes
> to if_list are protected by a lock, but reads are apparently
> unprotected. There's also the question of the life time of the object
> returned from ifunit(). Most things that access &ifnet's if_list are
> done without locking, and even if those accesses were to be locked, the
> lock would be released before the object is no longer used, causing the
> UaF in that case as well.
>
> This patch fixes the issue by making if_clone_{create,destroy} exclusive
> with all other ifioctls.
> ---
> sys/net/if.c | 36 +++++++++++++++++++++++++++++++++++-
> 1 file changed, 35 insertions(+), 1 deletion(-)
>
> diff --git sys/net/if.c sys/net/if.c
> index fb2f86f4a7c..9eedea83d32 100644
> --- sys/net/if.c
> +++ sys/net/if.c
> @@ -246,6 +246,11 @@ struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
>  */
> struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
>
> +/*
> + * Protect modifications to objects owned by ifnet's if_list
> + */
> +struct rwlock iflist_obj_lock = RWLOCK_INITIALIZER("iflist_obj_lock");
> +
> /*
>  * Network interface utility routines.
>  */
> @@ -1905,7 +1910,7 @@ if_setrdomain(struct ifnet *ifp, int rdomain)
>  * Interface ioctls.
>  */
> int
> -ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> +ifioctl_unlocked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> {
> struct ifnet *ifp;
> struct ifreq *ifr = (struct ifreq *)data;
> @@ -2432,6 +2437,35 @@ ifioctl_get(u_long cmd, caddr_t data)
> return (error);
> }
>
> +int
> +ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> +{
> + int ret;
> +
> + switch (cmd) {
> + case SIOCIFCREATE:
> + case SIOCIFDESTROY:
> + rw_enter_write(&iflist_obj_lock);
> + break;
> + default:
> + rw_enter_read(&iflist_obj_lock);
> + }
> +

You call ifioctl_unlocked() while you holding lock. I guess it’s should be
named ifioctl_locked().

> + ret = ifioctl_unlocked(so, cmd, data, p);
> +
> + switch (cmd) {
> + case SIOCIFCREATE:
> + case SIOCIFDESTROY:
> + rw_exit_write(&iflist_obj_lock);
> + break;
> + default:
> + rw_exit_read(&iflist_obj_lock);
> + }
> +
> + return (ret);
> +}
> +
> +
> static int
> if_sffpage_check(const caddr_t data)
> {
> --
> 2.27.0
>

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Martin Pieuchot
In reply to this post by Jason A. Donenfeld
On 24/06/20(Wed) 19:54, Jason A. Donenfeld wrote:

> On Wed, Jun 24, 2020 at 4:02 AM Martin Pieuchot <[hidden email]> wrote:
> > Yes, that might be a better way.  If I understood your original mail the
> > issue is related to ifunit(), right?  ifunit() is not used in packet-
> > processing contexts, that's why we did not protect it by the NET_LOCK().
> >
> > I'm not sure if all the ifunit() usages are necessary, many of them are
> > certainly exposing races with attach/destroy.
>
> No, not _just_ ifunit, but ifunit is one of the places where this is
> hit. But just one.

Which ones then?  I'd be delighted if you could share those facts.

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Vitaliy Makkoveev
In reply to this post by Jason A. Donenfeld
On Tue, Jun 23, 2020 at 01:00:06AM -0600, Jason A. Donenfeld wrote:

> You can crash a system by running something like:
>
>     for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig bridge0 destroy& done& done
>
> This works with every type of interface I've tried. It appears that
> if_clone_destroy and if_clone_create race with other ioctls, which
> causes a variety of different UaFs or just general logic errors.
>
> One common root cause appears to be that most ifioctl functions use
> ifunit() to find an interface by name, which traverses if_list. Writes
> to if_list are protected by a lock, but reads are apparently
> unprotected. There's also the question of the life time of the object
> returned from ifunit(). Most things that access &ifnet's if_list are
> done without locking, and even if those accesses were to be locked, the
> lock would be released before the object is no longer used, causing the
> UaF in that case as well.
>
> This patch fixes the issue by making if_clone_{create,destroy} exclusive
> with all other ifioctls.
> ---
>  sys/net/if.c | 36 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 35 insertions(+), 1 deletion(-)
>
> diff --git sys/net/if.c sys/net/if.c
> index fb2f86f4a7c..9eedea83d32 100644
> --- sys/net/if.c
> +++ sys/net/if.c
> @@ -246,6 +246,11 @@ struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
>   */
>  struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
>  
> +/*
> + * Protect modifications to objects owned by ifnet's if_list
> + */
> +struct rwlock iflist_obj_lock = RWLOCK_INITIALIZER("iflist_obj_lock");
> +
>  /*
>   * Network interface utility routines.
>   */
> @@ -1905,7 +1910,7 @@ if_setrdomain(struct ifnet *ifp, int rdomain)
>   * Interface ioctls.
>   */
>  int
> -ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> +ifioctl_unlocked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
>  {
>   struct ifnet *ifp;
>   struct ifreq *ifr = (struct ifreq *)data;
> @@ -2432,6 +2437,35 @@ ifioctl_get(u_long cmd, caddr_t data)
>   return (error);
>  }
>  
> +int
> +ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> +{
> + int ret;
> +
> + switch (cmd) {
> + case SIOCIFCREATE:
> + case SIOCIFDESTROY:
> + rw_enter_write(&iflist_obj_lock);
> + break;
> + default:
> + rw_enter_read(&iflist_obj_lock);
> + }
> +
> + ret = ifioctl_unlocked(so, cmd, data, p);
> +
> + switch (cmd) {
> + case SIOCIFCREATE:
> + case SIOCIFDESTROY:
> + rw_exit_write(&iflist_obj_lock);
> + break;
> + default:
> + rw_exit_read(&iflist_obj_lock);
> + }
> +
> + return (ret);
> +}
> +
> +
>  static int
>  if_sffpage_check(const caddr_t data)
>  {
> --
> 2.27.0
>

As mpi@ pointed you can sleep witchin ifioctl(). In most cases you lock
`iflist_obj_lock' for read and you didn't catch deadlock. But you also
can sleep witchin if_clone_create() and if_clone_create() while you lock
`iflist_obj_lock' for write.

As I see the races are:

1. We sleep in if_clone_create() and we don't mark allocated `unit' as
busy. So we can create multiple `ifp's with identical names and break
ifunit().

2. We sleep in if_clone_destroy() and we don't mark `ifp' as dying. So
we can grab this `ifp' by concurent thread.

3. We don't protect `ifp' being used in other cases so we can destroy
it.

Diff below fixes this issues. It's not for commit and I _know_ it's very
ugly. I just want to show the direction to fix this issus as I see it
and it's quickest way.

1. if_clone_create(). We reserve allocated `unit' unit so we can't do
multiple insertion of `ifp' with idential names.

2. if_clone_destroy(). We mark `ifp' as dying. Also we release `unit'
after we do `ifc_destroy' We can't destroy `ifp' more than once.

3. ifunit() will not receive `ifp' marked as dying.

4. We bump `ifp' ref counter in other cases to be sure if_detach() will
wait all concurent threads.

5. I disabled if_deactivate(ifp); in if_bridge. if_detach() will do it
so there is no reason to do it twice. I will do special diff for this.

I have 12:40 now. I run command below  since 10:00 and my system is
still stable.

comments?

---- cut begin ----
for i in 1 2 3; do while true; do ifconfig bridge0 create& \
        ifconfig bridge0 destroy& done& done
---- cut end ----


Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.610
diff -u -p -r1.610 if.c
--- sys/net/if.c 22 Jun 2020 09:45:13 -0000 1.610
+++ sys/net/if.c 25 Jun 2020 09:30:46 -0000
@@ -1236,6 +1236,18 @@ if_isconnected(const struct ifnet *ifp0,
  return (connected);
 }
 
+/* XXX: reserve unit */
+struct if_clone_unit {
+ LIST_ENTRY(if_clone_unit) ifcu_list;
+ struct if_clone *ifcu_ifc;
+ int ifcu_unit;
+};
+
+LIST_HEAD(, if_clone_unit) if_clone_units =
+ LIST_HEAD_INITIALIZER(if_clone_units);
+
+/* XXX: reserve unit */
+
 /*
  * Create a clone network interface.
  */
@@ -1246,6 +1258,8 @@ if_clone_create(const char *name, int rd
  struct ifnet *ifp;
  int unit, ret;
 
+ struct if_clone_unit *ifcu, *tifcu;
+
  ifc = if_clone_lookup(name, &unit);
  if (ifc == NULL)
  return (EINVAL);
@@ -1253,10 +1267,28 @@ if_clone_create(const char *name, int rd
  if (ifunit(name) != NULL)
  return (EEXIST);
 
+ /* XXX: reserve unit */
+ ifcu=malloc(sizeof(*ifcu), M_TEMP, M_WAITOK | M_ZERO);
+
+ LIST_FOREACH(tifcu, &if_clone_units, ifcu_list) {
+ if (tifcu->ifcu_ifc == ifc && tifcu->ifcu_unit == unit) {
+ free(ifcu, M_TEMP, 0);
+ return (EEXIST);
+ }
+ }
+ ifcu->ifcu_ifc = ifc;
+ ifcu->ifcu_unit = unit;
+ LIST_INSERT_HEAD(&if_clone_units, ifcu, ifcu_list);
+ /* XXX: reserve unit */
+
+
  ret = (*ifc->ifc_create)(ifc, unit);
 
- if (ret != 0 || (ifp = ifunit(name)) == NULL)
+ if (ret != 0 || (ifp = ifunit(name)) == NULL) {
+ LIST_REMOVE(ifcu, ifcu_list);
+ free(ifcu, M_TEMP, 0);
  return (ret);
+ }
 
  NET_LOCK();
  if_addgroup(ifp, ifc->ifc_name);
@@ -1275,15 +1307,18 @@ if_clone_destroy(const char *name)
 {
  struct if_clone *ifc;
  struct ifnet *ifp;
- int ret;
+ int unit, ret;
+
+ struct if_clone_unit *ifcu, *tifcu;
 
- ifc = if_clone_lookup(name, NULL);
+ ifc = if_clone_lookup(name, &unit);
  if (ifc == NULL)
  return (EINVAL);
 
  ifp = ifunit(name);
  if (ifp == NULL)
  return (ENXIO);
+ ifp->if_dying = 1;
 
  if (ifc->ifc_destroy == NULL)
  return (EOPNOTSUPP);
@@ -1298,6 +1333,15 @@ if_clone_destroy(const char *name)
  NET_UNLOCK();
  ret = (*ifc->ifc_destroy)(ifp);
 
+ /* XXX: release unit */
+ LIST_FOREACH_SAFE(ifcu, &if_clone_units, ifcu_list, tifcu) {
+ if (ifcu->ifcu_ifc == ifc && ifcu->ifcu_unit == unit) {
+ LIST_REMOVE(ifcu, ifcu_list);
+ free(ifcu, M_TEMP, 0);
+ }
+ }
+ /* XXX: release unit */
+
  return (ret);
 }
 
@@ -1749,8 +1793,11 @@ ifunit(const char *name)
  KERNEL_ASSERT_LOCKED();
 
  TAILQ_FOREACH(ifp, &ifnet, if_list) {
- if (strcmp(ifp->if_xname, name) == 0)
+ if (strcmp(ifp->if_xname, name) == 0) {
+ if (ifp->if_dying)
+ ifp = NULL;
  return (ifp);
+ }
  }
  return (NULL);
 }
@@ -1958,6 +2005,7 @@ ifioctl(struct socket *so, u_long cmd, c
  ifp = ifunit(ifr->ifr_name);
  if (ifp == NULL)
  return (ENXIO);
+ if_ref(ifp);
  oif_flags = ifp->if_flags;
  oif_xflags = ifp->if_xflags;
 
@@ -2314,6 +2362,7 @@ ifioctl(struct socket *so, u_long cmd, c
 
  if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
  getmicrotime(&ifp->if_lastchange);
+ if_put(ifp);
 
  return (error);
 }
@@ -2358,6 +2407,7 @@ ifioctl_get(u_long cmd, caddr_t data)
  ifp = ifunit(ifr->ifr_name);
  if (ifp == NULL)
  return (ENXIO);
+ if_ref(ifp);
 
  NET_RLOCK_IN_IOCTL();
 
@@ -2428,6 +2478,7 @@ ifioctl_get(u_long cmd, caddr_t data)
  }
 
  NET_RUNLOCK_IN_IOCTL();
+ if_put(ifp);
 
  return (error);
 }
@@ -2531,6 +2582,7 @@ ifconf(caddr_t data)
  if (space == 0) {
  TAILQ_FOREACH(ifp, &ifnet, if_list) {
  struct sockaddr *sa;
+ if_ref(ifp);
 
  if (TAILQ_EMPTY(&ifp->if_addrlist))
  space += sizeof (ifr);
@@ -2543,6 +2595,7 @@ ifconf(caddr_t data)
     sizeof(*sa);
  space += sizeof(ifr);
  }
+ if_put(ifp);
  }
  ifc->ifc_len = space;
  return (0);
@@ -2550,6 +2603,7 @@ ifconf(caddr_t data)
 
  ifrp = ifc->ifc_req;
  TAILQ_FOREACH(ifp, &ifnet, if_list) {
+ if_ref(ifp);
  if (space < sizeof(ifr))
  break;
  bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
@@ -2589,6 +2643,7 @@ ifconf(caddr_t data)
  break;
  space -= sizeof (ifr);
  }
+ if_put(ifp);
  }
  ifc->ifc_len -= space;
  return (error);
Index: sys/net/if_bridge.c
===================================================================
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.340
diff -u -p -r1.340 if_bridge.c
--- sys/net/if_bridge.c 24 Jun 2020 22:03:42 -0000 1.340
+++ sys/net/if_bridge.c 25 Jun 2020 09:30:46 -0000
@@ -234,7 +234,9 @@ bridge_clone_destroy(struct ifnet *ifp)
  bstp_destroy(sc->sc_stp);
 
  /* Undo pseudo-driver changes. */
+#if 0
  if_deactivate(ifp);
+#endif
 
  if_ih_remove(ifp, ether_input, NULL);
 
Index: sys/net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.105
diff -u -p -r1.105 if_var.h
--- sys/net/if_var.h 12 May 2020 08:49:54 -0000 1.105
+++ sys/net/if_var.h 25 Jun 2020 09:30:46 -0000
@@ -185,6 +185,7 @@ struct ifnet { /* and the entries */
  struct sockaddr_dl *if_sadl; /* [N] pointer to our sockaddr_dl */
 
  void *if_afdata[AF_MAX];
+ int if_dying;
 };
 #define if_mtu if_data.ifi_mtu
 #define if_type if_data.ifi_type

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Jason A. Donenfeld
In reply to this post by Martin Pieuchot
On Thu, Jun 25, 2020 at 2:49 AM Martin Pieuchot <[hidden email]> wrote:

>
> On 24/06/20(Wed) 19:54, Jason A. Donenfeld wrote:
> > On Wed, Jun 24, 2020 at 4:02 AM Martin Pieuchot <[hidden email]> wrote:
> > > Yes, that might be a better way.  If I understood your original mail the
> > > issue is related to ifunit(), right?  ifunit() is not used in packet-
> > > processing contexts, that's why we did not protect it by the NET_LOCK().
> > >
> > > I'm not sure if all the ifunit() usages are necessary, many of them are
> > > certainly exposing races with attach/destroy.
> >
> > No, not _just_ ifunit, but ifunit is one of the places where this is
> > hit. But just one.
>
> Which ones then?  I'd be delighted if you could share those facts.

You make it sound as if I'm deliberately concealing it in order to
dangle a tasty orange in front of you, but that's not what I meant.
Just look around in the source code at all of the code that uses an
ifp outside of a reference count or other locking semantics. Grepping
around, for example, I found
ifioctl->ifioctl_get->ifconf->if_list->kaboom.

There's lots of interesting behavior in there, and a pretty good
indication that you really don't want ioctls racing with either clone
or destroy, which is what my patch fixes.

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Martin Pieuchot
In reply to this post by Martin Pieuchot
On 23/06/20(Tue) 16:21, Martin Pieuchot wrote:
> On 23/06/20(Tue) 04:53, Jason A. Donenfeld wrote:
> > On 6/23/20, Martin Pieuchot <[hidden email]> wrote:
> > > On 23/06/20(Tue) 01:00, Jason A. Donenfeld wrote:
> > >> You can crash a system by running something like:
> > >>
> > >>     for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig
> > >> bridge0 destroy& done& done

As mentioned already the proposed diff doesn't protect creation of cloning
devices via open(2).  The above test could be replaced by:

for i in 1 2 3; do while true; \
        do cat /dev/tun0& ifconfig tun0 destroy& done& done

The same could be applied to switch(4) or by replacing the cat(1) magic
with 'ifconfig bridge0 add vether0.

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Jason A. Donenfeld
In reply to this post by Vitaliy Makkoveev
On Thu, Jun 25, 2020 at 3:54 AM Vitaliy Makkoveev
<[hidden email]> wrote:
>         ifp = ifunit(name);
>         if (ifp == NULL)
>                 return (ENXIO);
> +       ifp->if_dying = 1;

Reference counting, plus an explicit tear-down window, and wait
period, like you've proposed sounds like a good idea. You'll probably
want to make if_dying volatile or cast it to that so that the compiler
doesn't reorder it. But I wonder, at this point, why not go full-on
srp/rcu?

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] net: prevent if_clone_destroy from racing with rest of stack

Vitaliy Makkoveev
In reply to this post by Martin Pieuchot
On Thu, Jun 25, 2020 at 11:54:48AM +0200, Martin Pieuchot wrote:

> On 23/06/20(Tue) 16:21, Martin Pieuchot wrote:
> > On 23/06/20(Tue) 04:53, Jason A. Donenfeld wrote:
> > > On 6/23/20, Martin Pieuchot <[hidden email]> wrote:
> > > > On 23/06/20(Tue) 01:00, Jason A. Donenfeld wrote:
> > > >> You can crash a system by running something like:
> > > >>
> > > >>     for i in 1 2 3; do while true; do ifconfig bridge0 create& ifconfig
> > > >> bridge0 destroy& done& done
>
> As mentioned already the proposed diff doesn't protect creation of cloning
> devices via open(2).  The above test could be replaced by:
>
> for i in 1 2 3; do while true; \
> do cat /dev/tun0& ifconfig tun0 destroy& done& done
>
> The same could be applied to switch(4) or by replacing the cat(1) magic
> with 'ifconfig bridge0 add vether0.
>

I used this [1] diff with a little modification to tun(4). This
modification is required because ifunit() doesn't know about reservaton
used by if_clone_create(). Also I grab a reference on obtained `ifp'.

I run the commands below. System is stable.

---- cut begin 1----
for i in 1 2 3; do while true; \
        do ifconfig tun0 create& cat /dev/tun0& \
                ifconfig tun0 destroy& done& done
---- cut end 1 ----

---- cut begin 2----
ifconfig vether0 create
for i in 1 2 3; do while true; do ifconfig bridge0 create& \
        ifconfig bridge0 add vether0& \
        ifconfig bridge0 destroy& done& done
---- cut end 2 ----

1. https://marc.info/?l=openbsd-tech&m=159307900124245&w=2


Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.610
diff -u -p -r1.610 if.c
--- sys/net/if.c 22 Jun 2020 09:45:13 -0000 1.610
+++ sys/net/if.c 25 Jun 2020 11:53:42 -0000
@@ -1236,6 +1236,18 @@ if_isconnected(const struct ifnet *ifp0,
  return (connected);
 }
 
+/* XXX: reserve unit */
+struct if_clone_unit {
+ LIST_ENTRY(if_clone_unit) ifcu_list;
+ struct if_clone *ifcu_ifc;
+ int ifcu_unit;
+};
+
+LIST_HEAD(, if_clone_unit) if_clone_units =
+ LIST_HEAD_INITIALIZER(if_clone_units);
+
+/* XXX: reserve unit */
+
 /*
  * Create a clone network interface.
  */
@@ -1246,6 +1258,8 @@ if_clone_create(const char *name, int rd
  struct ifnet *ifp;
  int unit, ret;
 
+ struct if_clone_unit *ifcu, *tifcu;
+
  ifc = if_clone_lookup(name, &unit);
  if (ifc == NULL)
  return (EINVAL);
@@ -1253,10 +1267,28 @@ if_clone_create(const char *name, int rd
  if (ifunit(name) != NULL)
  return (EEXIST);
 
+ /* XXX: reserve unit */
+ ifcu=malloc(sizeof(*ifcu), M_TEMP, M_WAITOK | M_ZERO);
+
+ LIST_FOREACH(tifcu, &if_clone_units, ifcu_list) {
+ if (tifcu->ifcu_ifc == ifc && tifcu->ifcu_unit == unit) {
+ free(ifcu, M_TEMP, 0);
+ return (EEXIST);
+ }
+ }
+ ifcu->ifcu_ifc = ifc;
+ ifcu->ifcu_unit = unit;
+ LIST_INSERT_HEAD(&if_clone_units, ifcu, ifcu_list);
+ /* XXX: reserve unit */
+
+
  ret = (*ifc->ifc_create)(ifc, unit);
 
- if (ret != 0 || (ifp = ifunit(name)) == NULL)
+ if (ret != 0 || (ifp = ifunit(name)) == NULL) {
+ LIST_REMOVE(ifcu, ifcu_list);
+ free(ifcu, M_TEMP, 0);
  return (ret);
+ }
 
  NET_LOCK();
  if_addgroup(ifp, ifc->ifc_name);
@@ -1275,15 +1307,18 @@ if_clone_destroy(const char *name)
 {
  struct if_clone *ifc;
  struct ifnet *ifp;
- int ret;
+ int unit, ret;
+
+ struct if_clone_unit *ifcu, *tifcu;
 
- ifc = if_clone_lookup(name, NULL);
+ ifc = if_clone_lookup(name, &unit);
  if (ifc == NULL)
  return (EINVAL);
 
  ifp = ifunit(name);
  if (ifp == NULL)
  return (ENXIO);
+ ifp->if_dying = 1;
 
  if (ifc->ifc_destroy == NULL)
  return (EOPNOTSUPP);
@@ -1298,6 +1333,15 @@ if_clone_destroy(const char *name)
  NET_UNLOCK();
  ret = (*ifc->ifc_destroy)(ifp);
 
+ /* XXX: release unit */
+ LIST_FOREACH_SAFE(ifcu, &if_clone_units, ifcu_list, tifcu) {
+ if (ifcu->ifcu_ifc == ifc && ifcu->ifcu_unit == unit) {
+ LIST_REMOVE(ifcu, ifcu_list);
+ free(ifcu, M_TEMP, 0);
+ }
+ }
+ /* XXX: release unit */
+
  return (ret);
 }
 
@@ -1749,8 +1793,11 @@ ifunit(const char *name)
  KERNEL_ASSERT_LOCKED();
 
  TAILQ_FOREACH(ifp, &ifnet, if_list) {
- if (strcmp(ifp->if_xname, name) == 0)
+ if (strcmp(ifp->if_xname, name) == 0) {
+ if (ifp->if_dying)
+ ifp = NULL;
  return (ifp);
+ }
  }
  return (NULL);
 }
@@ -1958,6 +2005,7 @@ ifioctl(struct socket *so, u_long cmd, c
  ifp = ifunit(ifr->ifr_name);
  if (ifp == NULL)
  return (ENXIO);
+ if_ref(ifp);
  oif_flags = ifp->if_flags;
  oif_xflags = ifp->if_xflags;
 
@@ -2314,6 +2362,7 @@ ifioctl(struct socket *so, u_long cmd, c
 
  if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
  getmicrotime(&ifp->if_lastchange);
+ if_put(ifp);
 
  return (error);
 }
@@ -2358,6 +2407,7 @@ ifioctl_get(u_long cmd, caddr_t data)
  ifp = ifunit(ifr->ifr_name);
  if (ifp == NULL)
  return (ENXIO);
+ if_ref(ifp);
 
  NET_RLOCK_IN_IOCTL();
 
@@ -2428,6 +2478,7 @@ ifioctl_get(u_long cmd, caddr_t data)
  }
 
  NET_RUNLOCK_IN_IOCTL();
+ if_put(ifp);
 
  return (error);
 }
@@ -2531,6 +2582,7 @@ ifconf(caddr_t data)
  if (space == 0) {
  TAILQ_FOREACH(ifp, &ifnet, if_list) {
  struct sockaddr *sa;
+ if_ref(ifp);
 
  if (TAILQ_EMPTY(&ifp->if_addrlist))
  space += sizeof (ifr);
@@ -2543,6 +2595,7 @@ ifconf(caddr_t data)
     sizeof(*sa);
  space += sizeof(ifr);
  }
+ if_put(ifp);
  }
  ifc->ifc_len = space;
  return (0);
@@ -2550,6 +2603,7 @@ ifconf(caddr_t data)
 
  ifrp = ifc->ifc_req;
  TAILQ_FOREACH(ifp, &ifnet, if_list) {
+ if_ref(ifp);
  if (space < sizeof(ifr))
  break;
  bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
@@ -2589,6 +2643,7 @@ ifconf(caddr_t data)
  break;
  space -= sizeof (ifr);
  }
+ if_put(ifp);
  }
  ifc->ifc_len -= space;
  return (error);
Index: sys/net/if_bridge.c
===================================================================
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.340
diff -u -p -r1.340 if_bridge.c
--- sys/net/if_bridge.c 24 Jun 2020 22:03:42 -0000 1.340
+++ sys/net/if_bridge.c 25 Jun 2020 11:53:42 -0000
@@ -234,7 +234,9 @@ bridge_clone_destroy(struct ifnet *ifp)
  bstp_destroy(sc->sc_stp);
 
  /* Undo pseudo-driver changes. */
+#if 0
  if_deactivate(ifp);
+#endif
 
  if_ih_remove(ifp, ether_input, NULL);
 
Index: sys/net/if_tun.c
===================================================================
RCS file: /cvs/src/sys/net/if_tun.c,v
retrieving revision 1.222
diff -u -p -r1.222 if_tun.c
--- sys/net/if_tun.c 13 May 2020 00:48:06 -0000 1.222
+++ sys/net/if_tun.c 25 Jun 2020 11:53:42 -0000
@@ -381,8 +381,10 @@ tun_dev_open(dev_t dev, const struct if_
  snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
  rdomain = rtable_l2(p->p_p->ps_rtableid);
 
+#if 0
  /* let's find or make an interface to work with */
  while ((ifp = ifunit(name)) == NULL) {
+ static u_int xxx=0;
  error = if_clone_create(name, rdomain);
  switch (error) {
  case 0: /* it's probably ours */
@@ -393,7 +395,18 @@ tun_dev_open(dev_t dev, const struct if_
  default:
  return (error);
  }
+ printf("race %u\n", xxx++);
  }
+#else
+restart:
+ error = if_clone_create(name, rdomain);
+
+ if( !(error == 0 || error == EEXIST))
+ return (error);
+ if ((ifp = ifunit(name)) == NULL)
+ goto restart;
+ ifp = if_get(ifp->if_index);
+#endif
 
  sc = ifp->if_softc;
  /* wait for it to be fully constructed before we use it */
@@ -401,12 +414,14 @@ tun_dev_open(dev_t dev, const struct if_
  error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
  if (error != 0) {
  /* XXX if_clone_destroy if stayup? */
+ if_put(ifp);
  return (error);
  }
  }
 
  if (sc->sc_dev != 0) {
  /* aww, we lost */
+ if_put(ifp);
  return (EBUSY);
  }
  /* it's ours now */
@@ -475,6 +490,7 @@ tun_dev_close(dev_t dev, struct proc *p)
  sc->sc_dev = 0;
 
  tun_put(sc);
+ if_put(ifp);
 
  if (destroy)
  if_clone_destroy(name);
Index: sys/net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.105
diff -u -p -r1.105 if_var.h
--- sys/net/if_var.h 12 May 2020 08:49:54 -0000 1.105
+++ sys/net/if_var.h 25 Jun 2020 11:53:42 -0000
@@ -185,6 +185,7 @@ struct ifnet { /* and the entries */
  struct sockaddr_dl *if_sadl; /* [N] pointer to our sockaddr_dl */
 
  void *if_afdata[AF_MAX];
+ int if_dying;
 };
 #define if_mtu if_data.ifi_mtu
 #define if_type if_data.ifi_type