Re: fix races in if_clone_create()
On Mon, Jul 13, 2020 at 12:52:15PM +0300, Vitaliy Makkoveev wrote: > On Mon, Jul 13, 2020 at 09:53:44AM +0200, Martin Pieuchot wrote: > > On 06/07/20(Mon) 15:44, Vitaliy Makkoveev wrote: > > > > On 6 Jul 2020, at 12:17, Martin Pieuchot wrote: > > > > Assertions and documentation are more important than preventing races > > > > because they allow to build awareness and elegant solutions instead of > > > > hacking diffs until stuff work without knowing why. > > > > > > > > There are two cases where `ifp' are inserted into `ifnet': > > > > 1. by autoconf during boot or hotplug > > > > 2. by cloning ioctl > > > > > > > > In the second case it is always about pseudo-devices. So the assertion > > > > should be conditional like: > > > > > > > > if (ISSET(ifp->if_xflags, IFXF_CLONED)) > > > > rw_assert_wrlock(_lock); > > > > > > > > In other words this fixes serializes insertions/removal on the global > > > > list `ifnet', the KERNEL_LOCK() being still required for reading it. > > > > > > > > Is there any other data structure which ends up being protected by this > > > > approach and could be documented? > > > > > > We should be sure there is no multiple `ifnet’s in `if_list’ with the same > > > `if_xname’. > > > > That's a symptom of a bug. Checking for a symptom won't prevent another > > type of corruption, maybe next time it will be a corrupted pointer? > > Absolutely no. You don't break the list do you understand this? > > > > > > And the assertion you proposed looks not obvious here. > > > > Why, is it because of the if() check? That's required unless we change > > put all if_attach() functions under the lock which would require changing > > all driver in-tree. However since drivers for physical devices are being > > attached without having multiple CPUs running there's no possible race. > > Because we should keep `if_list’ be linked by objects with unique > `if_xname’. The modificatios of this list is not the problem. Problem is > inconsistency caused by not unique `if_xname'. You are not fixing > problem, just simptom. > > > > > > Assertion like below looks more reasonable but introduces performance > > > impact. > > > > We should first aim for correctness then performance. In this case, > > performance is not even an issue because interfaces are not created > > often compared to the rate of processing packets. > > > > Well, so let's do "KASSERT(ifunit(ifp->if_xname) == NULL);" here. Updated diff below. Now we have assertion in if_attach{,head}(). Index: sys/net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.612 diff -u -p -r1.612 if.c --- sys/net/if.c10 Jul 2020 13:23:34 - 1.612 +++ sys/net/if.c13 Jul 2020 10:50:44 - @@ -155,6 +155,8 @@ int if_getgrouplist(caddr_t); void if_linkstate(struct ifnet *); void if_linkstate_task(void *); +intif_clone_lock(struct if_clone *); +void if_clone_unlock(struct if_clone *); intif_clone_list(struct if_clonereq *); struct if_clone*if_clone_lookup(const char *, int *); @@ -524,6 +526,7 @@ if_attachhead(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); + KASSERT(ifunit(ifp->if_xname) == NULL); TAILQ_INSERT_HEAD(, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); @@ -534,6 +537,7 @@ if_attach(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); + KASSERT(ifunit(ifp->if_xname) == NULL); TAILQ_INSERT_TAIL(, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); @@ -1244,27 +1248,35 @@ if_clone_create(const char *name, int rd { struct if_clone *ifc; struct ifnet *ifp; - int unit, ret; + int unit, error; ifc = if_clone_lookup(name, ); if (ifc == NULL) return (EINVAL); - if (ifunit(name) != NULL) - return (EEXIST); + error = if_clone_lock(ifc); + if (error != 0) + return (error); + + if (ifunit(name) != NULL) { + error = (EEXIST); + goto unlock; + } - ret = (*ifc->ifc_create)(ifc, unit); + error = (*ifc->ifc_create)(ifc, unit); - if (ret != 0 || (ifp = ifunit(name)) == NULL) - return (ret); + if (error != 0 || (ifp = ifunit(name)) == NULL) + goto unlock; NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); if (rdomain != 0) if_setrdomain(ifp, rdomain); NET_UNLOCK(); +unlock: + if_clone_unlock(ifc); - return (ret); + return (error); } /* @@ -1275,18 +1287,26 @@ if_clone_destroy(const char *name) { struct if_clone *ifc; struct ifnet *ifp; - int ret; + int error; ifc = if_clone_lookup(name, NULL); if (ifc == NULL) return (EINVAL); + error =
Re: fix races in if_clone_create()
On Mon, Jul 13, 2020 at 09:53:44AM +0200, Martin Pieuchot wrote: > On 06/07/20(Mon) 15:44, Vitaliy Makkoveev wrote: > > > On 6 Jul 2020, at 12:17, Martin Pieuchot wrote: > > > Assertions and documentation are more important than preventing races > > > because they allow to build awareness and elegant solutions instead of > > > hacking diffs until stuff work without knowing why. > > > > > > There are two cases where `ifp' are inserted into `ifnet': > > > 1. by autoconf during boot or hotplug > > > 2. by cloning ioctl > > > > > > In the second case it is always about pseudo-devices. So the assertion > > > should be conditional like: > > > > > > if (ISSET(ifp->if_xflags, IFXF_CLONED)) > > > rw_assert_wrlock(_lock); > > > > > > In other words this fixes serializes insertions/removal on the global > > > list `ifnet', the KERNEL_LOCK() being still required for reading it. > > > > > > Is there any other data structure which ends up being protected by this > > > approach and could be documented? > > > > We should be sure there is no multiple `ifnet’s in `if_list’ with the same > > `if_xname’. > > That's a symptom of a bug. Checking for a symptom won't prevent another > type of corruption, maybe next time it will be a corrupted pointer? Absolutely no. You don't break the list do you understand this? > > > And the assertion you proposed looks not obvious here. > > Why, is it because of the if() check? That's required unless we change > put all if_attach() functions under the lock which would require changing > all driver in-tree. However since drivers for physical devices are being > attached without having multiple CPUs running there's no possible race. Because we should keep `if_list’ be linked by objects with unique `if_xname’. The modificatios of this list is not the problem. Problem is inconsistency caused by not unique `if_xname'. You are not fixing problem, just simptom. > > > Assertion like below looks more reasonable but introduces performance > > impact. > > We should first aim for correctness then performance. In this case, > performance is not even an issue because interfaces are not created > often compared to the rate of processing packets. > Well, so let's do "KASSERT(ifunit(ifp->if_xname) == NULL);" here.
Re: fix races in if_clone_create()
On 06/07/20(Mon) 15:44, Vitaliy Makkoveev wrote: > > On 6 Jul 2020, at 12:17, Martin Pieuchot wrote: > > Assertions and documentation are more important than preventing races > > because they allow to build awareness and elegant solutions instead of > > hacking diffs until stuff work without knowing why. > > > > There are two cases where `ifp' are inserted into `ifnet': > > 1. by autoconf during boot or hotplug > > 2. by cloning ioctl > > > > In the second case it is always about pseudo-devices. So the assertion > > should be conditional like: > > > > if (ISSET(ifp->if_xflags, IFXF_CLONED)) > > rw_assert_wrlock(_lock); > > > > In other words this fixes serializes insertions/removal on the global > > list `ifnet', the KERNEL_LOCK() being still required for reading it. > > > > Is there any other data structure which ends up being protected by this > > approach and could be documented? > > We should be sure there is no multiple `ifnet’s in `if_list’ with the same > `if_xname’. That's a symptom of a bug. Checking for a symptom won't prevent another type of corruption, maybe next time it will be a corrupted pointer? > And the assertion you proposed looks not obvious here. Why, is it because of the if() check? That's required unless we change put all if_attach() functions under the lock which would require changing all driver in-tree. However since drivers for physical devices are being attached without having multiple CPUs running there's no possible race. > Assertion like below looks more reasonable but introduces performance > impact. We should first aim for correctness then performance. In this case, performance is not even an issue because interfaces are not created often compared to the rate of processing packets.
Re: fix races in if_clone_create()
> On 6 Jul 2020, at 12:17, Martin Pieuchot wrote: > > On 01/07/20(Wed) 00:02, Vitaliy Makkoveev wrote: >> On Tue, Jun 30, 2020 at 03:48:22PM +0300, Vitaliy Makkoveev wrote: >>> On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote: On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote: > [...] > I reworked tool for reproduce. Now I avoided fork()/exec() route and it > takes couple of minutes to take panic on 4 cores. Also some screenshots > attached. Setting kern.pool_debug=2 makes the race reproducible in seconds. >> >> Unfortunately you will catch splassert() caused by kern/sched_bsd.c:304. >> malloc() will call yield() while we are holding NET_LOCK(). I attached >> screenshot with splassertion to this mail. > > With kern.splassert < 3 it is fine. > Could you turn this test into something committable in regress/? We can link it to the build once a fix is committed. >>> >>> We have 3 races with cloned interfaces: >>> 1. if_clone_create() vs if_clone_create() >>> 2. if_clone_destroy() vs if_clone_destroy() >>> 3. if_clone_destroy() vs the rest of stack >>> >>> It makes sences to commit unified test to regress/, so I suggest to wait >>> a little. >> >> The another solution. >> >> Diff below introduces per-`ifc' serialization for if_clone_create() and >> if_clone_destroy(). There is no index bitmap anymore. > > I like the simplification. More comments below: > >> +/* >> + * Lock a clone network interface. >> + */ >> +int >> +if_clone_lock(struct if_clone *ifc) >> +{ >> +int error; >> + >> +rw_enter_write(>ifc_lock); >> + >> +while (ifc->ifc_flags & IFC_CREATE_LOCKED) { >> +ifc->ifc_flags |= IFC_CREATE_LOCKWAIT; >> +error = rwsleep_nsec(>ifc_flags, >ifc_lock, >> +PWAIT|PCATCH, "ifclk", INFSLP); >> +if(error != 0) { >> +ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT; >> +rw_exit_write(>ifc_lock); >> +return error; >> +} >> +} >> +ifc->ifc_flags |= IFC_CREATE_LOCKED; >> +ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT; >> + >> +rw_exit_write(>ifc_lock); >> + >> +return 0; >> +} > > This is like re-implementing a rwlock but loosing the debugging ability of > WITNESS. The reason to do this is to avoid call `ifc_create’ with rwlock held. We have unique sleep points for each underlaying routine for `ifc_create’, so this "rwlock reimplementation" looks better. Also this lock is used in one place only and impact of loosing debugging ability is not such big. > > I also don't see any reason for having a per-ifc lock. If, at least one > of the problems, is a double insert in `ifnet' then we should be able to > assert that a lock is held when doing such assertion. This race breaks ifunit() not `if_list’. I mean LIST_*() operation are not broken because `le_{prev,next}’ are valid, but list is inconsistent of course. Since only "ifconfig clonerA0 create& ifconfig clonerA0 create” will break, I see no reason to deny simultaneous execution of “ifconfig clonerA0 create& ifconfig clonerB0 create”. Let this lock be per `ifc’ not global. > > Assertions and documentation are more important than preventing races > because they allow to build awareness and elegant solutions instead of > hacking diffs until stuff work without knowing why. > > There are two cases where `ifp' are inserted into `ifnet': > 1. by autoconf during boot or hotplug > 2. by cloning ioctl > > In the second case it is always about pseudo-devices. So the assertion > should be conditional like: > > if (ISSET(ifp->if_xflags, IFXF_CLONED)) > rw_assert_wrlock(_lock); > > In other words this fixes serializes insertions/removal on the global > list `ifnet', the KERNEL_LOCK() being still required for reading it. > > Is there any other data structure which ends up being protected by this > approach and could be documented? We should be sure there is no multiple `ifnet’s in `if_list’ with the same `if_xname’. And the assertion you proposed looks not obvious here. Assertion like below looks more reasonable but introduces performance impact. cut begin void if_attach(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); KASSERT(ifunit(ifp->if_xname) == NULL); TAILQ_INSERT_TAIL(, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); } cut end I guess the commentary within if_clone_create() is the best solution. Something like this: “Deny simultaneous execution to prevent multiple creation of interfaces with the same name”.
Re: fix races in if_clone_create()
On 01/07/20(Wed) 00:02, Vitaliy Makkoveev wrote: > On Tue, Jun 30, 2020 at 03:48:22PM +0300, Vitaliy Makkoveev wrote: > > On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote: > > > On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote: > > > > [...] > > > > I reworked tool for reproduce. Now I avoided fork()/exec() route and it > > > > takes couple of minutes to take panic on 4 cores. Also some screenshots > > > > attached. > > > > > > Setting kern.pool_debug=2 makes the race reproducible in seconds. > > Unfortunately you will catch splassert() caused by kern/sched_bsd.c:304. > malloc() will call yield() while we are holding NET_LOCK(). I attached > screenshot with splassertion to this mail. With kern.splassert < 3 it is fine. > > > Could you turn this test into something committable in regress/? We can > > > link it to the build once a fix is committed. > > > > > > > We have 3 races with cloned interfaces: > > 1. if_clone_create() vs if_clone_create() > > 2. if_clone_destroy() vs if_clone_destroy() > > 3. if_clone_destroy() vs the rest of stack > > > > It makes sences to commit unified test to regress/, so I suggest to wait > > a little. > > The another solution. > > Diff below introduces per-`ifc' serialization for if_clone_create() and > if_clone_destroy(). There is no index bitmap anymore. I like the simplification. More comments below: > +/* > + * Lock a clone network interface. > + */ > +int > +if_clone_lock(struct if_clone *ifc) > +{ > + int error; > + > + rw_enter_write(>ifc_lock); > + > + while (ifc->ifc_flags & IFC_CREATE_LOCKED) { > + ifc->ifc_flags |= IFC_CREATE_LOCKWAIT; > + error = rwsleep_nsec(>ifc_flags, >ifc_lock, > + PWAIT|PCATCH, "ifclk", INFSLP); > + if(error != 0) { > + ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT; > + rw_exit_write(>ifc_lock); > + return error; > + } > + } > + ifc->ifc_flags |= IFC_CREATE_LOCKED; > + ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT; > + > + rw_exit_write(>ifc_lock); > + > + return 0; > +} This is like re-implementing a rwlock but loosing the debugging ability of WITNESS. I also don't see any reason for having a per-ifc lock. If, at least one of the problems, is a double insert in `ifnet' then we should be able to assert that a lock is held when doing such assertion. Assertions and documentation are more important than preventing races because they allow to build awareness and elegant solutions instead of hacking diffs until stuff work without knowing why. There are two cases where `ifp' are inserted into `ifnet': 1. by autoconf during boot or hotplug 2. by cloning ioctl In the second case it is always about pseudo-devices. So the assertion should be conditional like: if (ISSET(ifp->if_xflags, IFXF_CLONED)) rw_assert_wrlock(_lock); In other words this fixes serializes insertions/removal on the global list `ifnet', the KERNEL_LOCK() being still required for reading it. Is there any other data structure which ends up being protected by this approach and could be documented?
Re: fix races in if_clone_create()
ping? > On 1 Jul 2020, at 00:02, Vitaliy Makkoveev wrote: > > On Tue, Jun 30, 2020 at 03:48:22PM +0300, Vitaliy Makkoveev wrote: >> On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote: >>> On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote: [...] I reworked tool for reproduce. Now I avoided fork()/exec() route and it takes couple of minutes to take panic on 4 cores. Also some screenshots attached. >>> >>> Setting kern.pool_debug=2 makes the race reproducible in seconds. > > Unfortunately you will catch splassert() caused by kern/sched_bsd.c:304. > malloc() will call yield() while we are holding NET_LOCK(). I attached > screenshot with splassertion to this mail. > >>> >>> Could you turn this test into something committable in regress/? We can >>> link it to the build once a fix is committed. >>> >> >> We have 3 races with cloned interfaces: >> 1. if_clone_create() vs if_clone_create() >> 2. if_clone_destroy() vs if_clone_destroy() >> 3. if_clone_destroy() vs the rest of stack >> >> It makes sences to commit unified test to regress/, so I suggest to wait >> a little. > > The another solution. > > Diff below introduces per-`ifc' serialization for if_clone_create() and > if_clone_destroy(). There is no index bitmap anymore. > > Diff fixes the following races: > 1. if_clone_create() vs if_clone_create() > 2. if_clone_destroy() vs if_clone_destroy() > > `ifc_create' will go the same lock path for each cloner, and > `ifc_destroy' will go this path but in reverse order. It seems > reasonable to allow simultaneous create/destroy for different cloners > but since different instances of one cloner will block each other it's > no reason have parallelism here. > > Updated test tool > cut begin > #include > #include > #include > #include > #include > #include > #include > #include > #include > #include > #include > > static struct ifreq ifr; > > static void *clone_create(void *arg) > { > int s; > > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) > err(1, "socket()"); > while(1){ > if(ioctl(s, SIOCIFCREATE, )<0) > if(errno==EINVAL) > exit(1); > } > > return NULL; > } > > static void *clone_destroy(void *arg) > { > int s; > > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) > err(1, "socket()"); > while(1){ > if(ioctl(s, SIOCIFDESTROY, )<0) > if(errno==EINVAL) > exit(1); > } > > return NULL; > } > > int main(int argc, char *argv[]) > { > pthread_t thr; > int i; > > if(argc!=2){ > fprintf(stderr, "usage: %s ifname\n", getprogname()); > return 1; > } > > if(getuid()!=0){ > fprintf(stderr, "should be root\n"); > return 1; > } > > memset(, 0, sizeof(ifr)); > strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name)); > > for(i=0; i<8*4; ++i){ > if(pthread_create(, NULL, clone_create, NULL)!=0) > errx(1, "pthread_create(clone_create)"); > if(pthread_create(, NULL, clone_destroy, NULL)!=0) > errx(1, "pthread_create(clone_destroy)"); > } > > select(0, NULL, NULL, NULL, NULL); > > return 0; > } > cut end > > > > Index: sys/net/if.c > === > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.611 > diff -u -p -r1.611 if.c > --- sys/net/if.c 30 Jun 2020 09:31:38 - 1.611 > +++ sys/net/if.c 30 Jun 2020 20:41:50 - > @@ -155,6 +155,8 @@ int if_getgrouplist(caddr_t); > void if_linkstate(struct ifnet *); > void if_linkstate_task(void *); > > +int if_clone_lock(struct if_clone *); > +void if_clone_unlock(struct if_clone *); > int if_clone_list(struct if_clonereq *); > struct if_clone *if_clone_lookup(const char *, int *); > > @@ -1244,27 +1246,35 @@ if_clone_create(const char *name, int rd > { > struct if_clone *ifc; > struct ifnet *ifp; > - int unit, ret; > + int unit, error; > > ifc = if_clone_lookup(name, ); > if (ifc == NULL) > return (EINVAL); > > - if (ifunit(name) != NULL) > - return (EEXIST); > + error = if_clone_lock(ifc); > + if (error != 0) > + return (error); > + > + if (ifunit(name) != NULL) { > + error = (EEXIST); > + goto unlock; > + } > > - ret = (*ifc->ifc_create)(ifc, unit); > + error = (*ifc->ifc_create)(ifc, unit); > > - if (ret != 0 || (ifp = ifunit(name)) == NULL) > - return (ret); > + if (error != 0 || (ifp = ifunit(name)) == NULL) > + goto unlock; > > NET_LOCK(); > if_addgroup(ifp, ifc->ifc_name); > if (rdomain != 0) >
Re: fix races in if_clone_create()
On Tue, Jun 30, 2020 at 03:48:22PM +0300, Vitaliy Makkoveev wrote: > On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote: > > On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote: > > > [...] > > > I reworked tool for reproduce. Now I avoided fork()/exec() route and it > > > takes couple of minutes to take panic on 4 cores. Also some screenshots > > > attached. > > > > Setting kern.pool_debug=2 makes the race reproducible in seconds. Unfortunately you will catch splassert() caused by kern/sched_bsd.c:304. malloc() will call yield() while we are holding NET_LOCK(). I attached screenshot with splassertion to this mail. > > > > Could you turn this test into something committable in regress/? We can > > link it to the build once a fix is committed. > > > > We have 3 races with cloned interfaces: > 1. if_clone_create() vs if_clone_create() > 2. if_clone_destroy() vs if_clone_destroy() > 3. if_clone_destroy() vs the rest of stack > > It makes sences to commit unified test to regress/, so I suggest to wait > a little. The another solution. Diff below introduces per-`ifc' serialization for if_clone_create() and if_clone_destroy(). There is no index bitmap anymore. Diff fixes the following races: 1. if_clone_create() vs if_clone_create() 2. if_clone_destroy() vs if_clone_destroy() `ifc_create' will go the same lock path for each cloner, and `ifc_destroy' will go this path but in reverse order. It seems reasonable to allow simultaneous create/destroy for different cloners but since different instances of one cloner will block each other it's no reason have parallelism here. Updated test tool cut begin #include #include #include #include #include #include #include #include #include #include #include static struct ifreq ifr; static void *clone_create(void *arg) { int s; if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) err(1, "socket()"); while(1){ if(ioctl(s, SIOCIFCREATE, )<0) if(errno==EINVAL) exit(1); } return NULL; } static void *clone_destroy(void *arg) { int s; if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) err(1, "socket()"); while(1){ if(ioctl(s, SIOCIFDESTROY, )<0) if(errno==EINVAL) exit(1); } return NULL; } int main(int argc, char *argv[]) { pthread_t thr; int i; if(argc!=2){ fprintf(stderr, "usage: %s ifname\n", getprogname()); return 1; } if(getuid()!=0){ fprintf(stderr, "should be root\n"); return 1; } memset(, 0, sizeof(ifr)); strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name)); for(i=0; i<8*4; ++i){ if(pthread_create(, NULL, clone_create, NULL)!=0) errx(1, "pthread_create(clone_create)"); if(pthread_create(, NULL, clone_destroy, NULL)!=0) errx(1, "pthread_create(clone_destroy)"); } select(0, NULL, NULL, NULL, NULL); return 0; } cut end Index: sys/net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.611 diff -u -p -r1.611 if.c --- sys/net/if.c30 Jun 2020 09:31:38 - 1.611 +++ sys/net/if.c30 Jun 2020 20:41:50 - @@ -155,6 +155,8 @@ int if_getgrouplist(caddr_t); void if_linkstate(struct ifnet *); void if_linkstate_task(void *); +intif_clone_lock(struct if_clone *); +void if_clone_unlock(struct if_clone *); intif_clone_list(struct if_clonereq *); struct if_clone*if_clone_lookup(const char *, int *); @@ -1244,27 +1246,35 @@ if_clone_create(const char *name, int rd { struct if_clone *ifc; struct ifnet *ifp; - int unit, ret; + int unit, error; ifc = if_clone_lookup(name, ); if (ifc == NULL) return (EINVAL); - if (ifunit(name) != NULL) - return (EEXIST); + error = if_clone_lock(ifc); + if (error != 0) + return (error); + + if (ifunit(name) != NULL) { + error = (EEXIST); + goto unlock; + } - ret = (*ifc->ifc_create)(ifc, unit); + error = (*ifc->ifc_create)(ifc, unit); - if (ret != 0 || (ifp = ifunit(name)) == NULL) - return (ret); + if (error != 0 || (ifp = ifunit(name)) == NULL) + goto unlock; NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); if (rdomain != 0) if_setrdomain(ifp, rdomain); NET_UNLOCK(); +unlock: + if_clone_unlock(ifc); - return (ret); + return (error); } /* @@ -1275,18 +1285,26 @@ if_clone_destroy(const char *name) { struct if_clone *ifc;
Re: fix races in if_clone_create()
On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote: > On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote: > > [...] > > I reworked tool for reproduce. Now I avoided fork()/exec() route and it > > takes couple of minutes to take panic on 4 cores. Also some screenshots > > attached. > > Setting kern.pool_debug=2 makes the race reproducible in seconds. > > Could you turn this test into something committable in regress/? We can > link it to the build once a fix is committed. > We have 3 races with cloned interfaces: 1. if_clone_create() vs if_clone_create() 2. if_clone_destroy() vs if_clone_destroy() 3. if_clone_destroy() vs the rest of stack It makes sences to commit unified test to regress/, so I suggest to wait a little. > > #include > > #include > > #include > > #include > > #include > > #include > > #include > > #include > > #include > > #include > > > > static struct ifreq ifr; > > > > static void *clone_create(void *arg) > > { > > int s; > > > > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) > > err(1, "socket()"); > > while(1){ > > if(ioctl(s, SIOCIFCREATE, )<0) > > if(errno==EINVAL) > > exit(1); > > } > > > > return NULL; > > } > > > > static void *clone_destroy(void *arg) > > { > > int s; > > > > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) > > err(1, "socket()"); > > while(1){ > > if(ioctl(s, SIOCIFDESTROY, )<0) > > if(errno==EINVAL) > > exit(1); > > } > > > > return NULL; > > } > > > > int main(int argc, char *argv[]) > > { > > pthread_t thr; > > int i; > > > > if(argc!=2){ > > fprintf(stderr, "usage: %s ifname\n", getprogname()); > > return 1; > > } > > > > if(getuid()!=0){ > > fprintf(stderr, "should be root\n"); > > return 1; > > } > > > > memset(, 0, sizeof(ifr)); > > strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name)); > > > > for(i=0; i<8*4; ++i){ > > if(pthread_create(, NULL, clone_create, NULL)!=0) > > errx(1, "pthread_create(clone_create)"); > > } > > > > clone_destroy(NULL); > > > > return 0; > > } > > > > cut end > > > > >
Re: fix races in if_clone_create()
On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote: > [...] > I reworked tool for reproduce. Now I avoided fork()/exec() route and it > takes couple of minutes to take panic on 4 cores. Also some screenshots > attached. Setting kern.pool_debug=2 makes the race reproducible in seconds. Could you turn this test into something committable in regress/? We can link it to the build once a fix is committed. > #include > #include > #include > #include > #include > #include > #include > #include > #include > #include > > static struct ifreq ifr; > > static void *clone_create(void *arg) > { > int s; > > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) > err(1, "socket()"); > while(1){ > if(ioctl(s, SIOCIFCREATE, )<0) > if(errno==EINVAL) > exit(1); > } > > return NULL; > } > > static void *clone_destroy(void *arg) > { > int s; > > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) > err(1, "socket()"); > while(1){ > if(ioctl(s, SIOCIFDESTROY, )<0) > if(errno==EINVAL) > exit(1); > } > > return NULL; > } > > int main(int argc, char *argv[]) > { > pthread_t thr; > int i; > > if(argc!=2){ > fprintf(stderr, "usage: %s ifname\n", getprogname()); > return 1; > } > > if(getuid()!=0){ > fprintf(stderr, "should be root\n"); > return 1; > } > > memset(, 0, sizeof(ifr)); > strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name)); > > for(i=0; i<8*4; ++i){ > if(pthread_create(, NULL, clone_create, NULL)!=0) > errx(1, "pthread_create(clone_create)"); > } > > clone_destroy(NULL); > > return 0; > } > > cut end
Re: fix races in if_clone_create()
On Mon, Jun 29, 2020 at 04:27:50PM +0200, Hrvoje Popovski wrote: > On 29.6.2020. 10:59, Vitaliy Makkoveev wrote: > > I reworked tool for reproduce. Now I avoided fork()/exec() route and it > > takes couple of minutes to take panic on 4 cores. Also some screenshots > > attached. > > > > I hope anyone else will try it. > > Hi, > > i'm getting panic quite fast :) > i will leave box in ddb if more information is needed > Thanks. Right now it takes seconds to catch panic at least with switch(4), bridge(4), pflog(4), vether(4) and etherip(4). So you can leave ddb(4). I like to someone will try my solution for this issue. And reviews are welcomed :) The latest diff below. If the `unit' was obtained it's guaranteed that there is no pseudo interface with `name' is system. ifunit() now useless here and can be dropped. Index: sys/net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.610 diff -u -p -r1.610 if.c --- sys/net/if.c22 Jun 2020 09:45:13 - 1.610 +++ sys/net/if.c29 Jun 2020 13:54:29 - @@ -157,6 +157,8 @@ voidif_linkstate_task(void *); intif_clone_list(struct if_clonereq *); struct if_clone*if_clone_lookup(const char *, int *); +intif_clone_alloc_unit(struct if_clone *, int); +void if_clone_rele_unit(struct if_clone *, int); intif_group_egress_build(void); @@ -1244,19 +1246,18 @@ if_clone_create(const char *name, int rd { struct if_clone *ifc; struct ifnet *ifp; - int unit, ret; + int unit, error; ifc = if_clone_lookup(name, ); if (ifc == NULL) return (EINVAL); + error = if_clone_alloc_unit(ifc, unit); + if (error != 0) + return (error); - if (ifunit(name) != NULL) - return (EEXIST); - - ret = (*ifc->ifc_create)(ifc, unit); - - if (ret != 0 || (ifp = ifunit(name)) == NULL) - return (ret); + error = (*ifc->ifc_create)(ifc, unit); + if (error != 0 || (ifp = ifunit(name)) == NULL) + return (error); NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); @@ -1264,7 +1265,7 @@ if_clone_create(const char *name, int rd if_setrdomain(ifp, rdomain); NET_UNLOCK(); - return (ret); + return (0); } /* @@ -1275,9 +1276,9 @@ if_clone_destroy(const char *name) { struct if_clone *ifc; struct ifnet *ifp; - int ret; + int ret, unit; - ifc = if_clone_lookup(name, NULL); + ifc = if_clone_lookup(name, ); if (ifc == NULL) return (EINVAL); @@ -1297,6 +1298,7 @@ if_clone_destroy(const char *name) } NET_UNLOCK(); ret = (*ifc->ifc_destroy)(ifp); + if_clone_rele_unit(ifc, unit); return (ret); } @@ -1342,12 +1344,95 @@ if_clone_lookup(const char *name, int *u unit = (unit * 10) + (*cp++ - '0'); } - if (unitp != NULL) - *unitp = unit; + *unitp = unit; return (ifc); } /* + * Allocate unit for cloned network interface. + */ +int if_clone_alloc_unit(struct if_clone *ifc, int unit) +{ + int word, bit, ret; + + word = unit / (sizeof(*ifc->ifc_map) * 8); + bit = unit % (sizeof(*ifc->ifc_map) * 8); + + rw_enter_write(>ifc_lock); + + if(word >= ifc->ifc_map_size) { + u_long *map; + int size; + + size = word + 1; + map = mallocarray(size, sizeof(*map), M_TEMP, M_WAITOK | + M_ZERO); + + if (ifc->ifc_map != NULL) { + memcpy(map, ifc->ifc_map, ifc->ifc_map_size); + free(ifc->ifc_map, M_TEMP, + ifc->ifc_map_size * sizeof(*map)); + } + + ifc->ifc_map = map; + ifc->ifc_map_size = size; + } + + if (ifc->ifc_map[word] & (1UL << bit)) + ret = EEXIST; + else { + ifc->ifc_map[word] |= (1UL << bit); + ret = 0; + } + + rw_exit_write(>ifc_lock); + + return ret; +} + +/* + * Release allocated unit for cloned network interface. + */ +void if_clone_rele_unit(struct if_clone *ifc, int unit) +{ + int word, bit; + + word = unit / (sizeof(*ifc->ifc_map) * 8); + bit = unit % (sizeof(*ifc->ifc_map) * 8); + + rw_enter_write(>ifc_lock); + KASSERT(word < ifc->ifc_map_size); + + ifc->ifc_map[word] &= ~(1UL << bit); + + if (ifc->ifc_map[word] == 0) { + u_long *map; + int size; + + size = ifc->ifc_map_size - 2; + while (size>=0) { + if (ifc->ifc_map[size] != 0) + break; + --size; + } + if (size<0) { + size = 0; +
Re: fix races in if_clone_create()
On Sat, Jun 27, 2020 at 12:10:24PM +0200, Martin Pieuchot wrote: > On 27/06/20(Sat) 00:35, Vitaliy Makkoveev wrote: > > On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote: > > > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote: > > > > if_clone_create() has the races caused by context switch. > > > > > > Can you share a backtrace of such race? Where does the kernel panic? > > > > > > > This diff was inspired by thread [1]. As I explained [2] here is 3 > > issues that cause panics produced by command below: > > > > cut begin > > for i in 1 2 3; do while true; do ifconfig bridge0 create& \ > > ifconfig bridge0 destroy& done& done > > cut end > > Thanks, I couldn't reproduce it on any of the machines I tried. Did you > managed to reproduce it with other pseudo-devices or just with bridge0? > > > My system was stable with the last diff I did for thread [1]. But since > > this final diff [3] which include fixes for tun(4) is quick and dirty > > and not for commit I decided to make the diff to fix the races caused by > > if_clone_create() at first. > > > > I included screenshot with panic. > > Thanks, interesting that the corruption happens on a list that should be > initialized. Does that mean the context switch on Thread 1 is happening > before if_attach_common() is called? > > You said your previous email that there's a context switch. Do you know > when it happens? You could see that in ddb by looking at the backtrace > of the other CPU. > > Is the context switch leading to the race common to all pseudo-drivers > or is it in the bridge(4) driver? > > Regarding your solution, do I understand correctly that the goal is to > serialize all if_clone_create()? Is it really needed to remember which > unit is being currently created or can't we just serialize all of them? > > The fact that a lock is not held over the cloning operation is imho > positive. > I reworked tool for reproduce. Now I avoided fork()/exec() route and it takes couple of minutes to take panic on 4 cores. Also some screenshots attached. I hope anyone else will try it. cut begin #include #include #include #include #include #include #include #include #include #include static struct ifreq ifr; static void *clone_create(void *arg) { int s; if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) err(1, "socket()"); while(1){ if(ioctl(s, SIOCIFCREATE, )<0) if(errno==EINVAL) exit(1); } return NULL; } static void *clone_destroy(void *arg) { int s; if((s=socket(AF_INET, SOCK_DGRAM, 0))<0) err(1, "socket()"); while(1){ if(ioctl(s, SIOCIFDESTROY, )<0) if(errno==EINVAL) exit(1); } return NULL; } int main(int argc, char *argv[]) { pthread_t thr; int i; if(argc!=2){ fprintf(stderr, "usage: %s ifname\n", getprogname()); return 1; } if(getuid()!=0){ fprintf(stderr, "should be root\n"); return 1; } memset(, 0, sizeof(ifr)); strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name)); for(i=0; i<8*4; ++i){ if(pthread_create(, NULL, clone_create, NULL)!=0) errx(1, "pthread_create(clone_create)"); } clone_destroy(NULL); return 0; } cut end
Re: fix races in if_clone_create()
screenshot
Re: fix races in if_clone_create()
On 29.6.2020. 10:59, Vitaliy Makkoveev wrote: > I reworked tool for reproduce. Now I avoided fork()/exec() route and it > takes couple of minutes to take panic on 4 cores. Also some screenshots > attached. > > I hope anyone else will try it. Hi, i'm getting panic quite fast :) i will leave box in ddb if more information is needed r620-1# ./a.out bridge0 panic: kernel diagnostic assertion "TAILQ_EMPTY(>if_addrhooks)" failed: file "/sys/net/if.c", line 1168 Stopped at db_enter+0x10: popq%rbp TIDPIDUID PRFLAGS PFLAGS CPU COMMAND 475311 7753 1000 0x3 00 ifconfig *128110 3280 0 0x3 01K a.out 86419 3280 0 0x3 0x4004 a.out 352360 3280 0 0x3 0x4003 a.out 309715 3280 0 0x3 0x4005 a.out 268210 3280 0 0x3 0x4002 a.out db_enter() at db_enter+0x10 panic(81df42d3) at panic+0x128 __assert(81e5d55e,81e5b1fa,490,81e408d9) at __assert+0x2b if_detach(81169000) at if_detach+0x45f bridge_clone_destroy(81169000) at bridge_clone_destroy+0x17b ifioctl(fd839209c828,80206979,8000248fa980,800024902618) at ifioctl+0x1c2 soo_ioctl(fd83b04b34c8,80206979,8000248fa980,800024902618) at soo_ioctl+0x171 sys_ioctl(800024902618,8000248faa90,8000248faaf0) at sys_ioctl+0x2df syscall(8000248fab60) at syscall+0x389 Xsyscall() at Xsyscall+0x128 end of kernel end trace frame: 0x7f7d3600, count: 5 https://www.openbsd.org/ddb.html describes the minimum info required in bug reports. Insufficient info makes it difficult to find and fix bugs. ddb{1}>
Re: fix races in if_clone_create()
On Sat, Jun 27, 2020 at 12:10:24PM +0200, Martin Pieuchot wrote: > On 27/06/20(Sat) 00:35, Vitaliy Makkoveev wrote: > > On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote: > > > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote: > > > > if_clone_create() has the races caused by context switch. > > > > > > Can you share a backtrace of such race? Where does the kernel panic? > > > > > > > This diff was inspired by thread [1]. As I explained [2] here is 3 > > issues that cause panics produced by command below: > > > > cut begin > > for i in 1 2 3; do while true; do ifconfig bridge0 create& \ > > ifconfig bridge0 destroy& done& done > > cut end > > Thanks, I couldn't reproduce it on any of the machines I tried. Did you > managed to reproduce it with other pseudo-devices or just with bridge0? > In thread [1] you talked about bridge(4), tun(4) and vether(4). A first I fixed races in if_clone_destroy() and I caught the races with if_clone_create() while I run your initial comman but with vether(4) cut begin for i in 0 1 2 3 4 5 6 7; do while true; \ do cat /dev/vether0& ifconfig vether0 destroy& done& done cut end It's hard to reproduce this issue. The best chances for me is bare metal 8 cores, fully unloaded system, no X, no active processes, test started at console and all output redirected to /dev/null. And it can take *hours* to catch. I can't reproduce this on 2 cores. I can't reproduce this at 4 cores under kvm but it's reproducible under virtual box under osx. The hosts has 8 cores. I can reproduce this on bare metal with 4 cores, but also it takes time. Routine called by `ifc_create' within if_clone_attach() is very specific to each pseudo interface. if_attach() is the only common point to sleep for them, but you also can sleep in any point of sleep before `ifc_create' will call if_attach(), For exmaple you will alloc software context with `M_WAITOK'. bridge(4) is just the best way to reproduce to me. You have all `ifnet's linked to `if_list'. ifunit() does linear search in this list by compare `ifp->if_xname' and given `name'. So if you inserted many `ifnet's to this list ifunit() will return you first. but if_get(9) doesn't work with this list. So if you have the case I talk above if_get(9) and ifname() are inconsistent. Some times in the stack you use if_get(9) sometimes you use ifunit() so you work every time with diffetrent `ifnet's with the same `if_xname'. You can't predict where `ifnet' will be corrupted. > > My system was stable with the last diff I did for thread [1]. But since > > this final diff [3] which include fixes for tun(4) is quick and dirty > > and not for commit I decided to make the diff to fix the races caused by > > if_clone_create() at first. > > > > I included screenshot with panic. > > Thanks, interesting that the corruption happens on a list that should be > initialized. Does that mean the context switch on Thread 1 is happening > before if_attach_common() is called? > I don't know where it was. if_attach() doesn't checks if `ifnet' with the name in `if_xname' already linked. You will insert passed `ifnet' in any cases. If you have more then one `ifnet' with identical `if_xname' you have broken ifunit() and if_get() logic. Look at if_attach(): cut begin if_attach(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); TAILQ_INSERT_TAIL(, ifp, if_list); /* (1) */ if_attachsetup(ifp); NET_UNLOCK(); } You link `ifp' at (1). And it's still your `ifp' before and after context switch ot without context switch. You will brake it later. The reason is pseudo driver received the same `unit' more than once. And it created two or more software context with identical `unit'. And internal pseudo driver's logic is broken. Also ifunit() and if_get(9) are inconsistent now. You can break memory everythere. cut end > You said your previous email that there's a context switch. Do you know > when it happens? You could see that in ddb by looking at the backtrace > of the other CPU. > > Is the context switch leading to the race common to all pseudo-drivers > or is it in the bridge(4) driver? ddb(4) is useless. The panic occured while we are trying to if_detach() already broken `ifnet'. There is no reces here. But the rases was *before* and we inserted two or more `ifnet's with the same name to `if_list'. This insertion is no panic condition. The first time I caught this races while I connected to you [1] thread. I inserted ifunit() call to if_attach() as below and received panic so I'am shure about the reason: cut begin if_attach(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); KASSERT(ifunit(ifp->if_xname)); TAILQ_INSERT_TAIL(, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); } cut end But in thread [1] you said these races with pseudo interfaces are very old well know
Re: fix races in if_clone_create()
On 27/06/20(Sat) 00:35, Vitaliy Makkoveev wrote: > On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote: > > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote: > > > if_clone_create() has the races caused by context switch. > > > > Can you share a backtrace of such race? Where does the kernel panic? > > > > This diff was inspired by thread [1]. As I explained [2] here is 3 > issues that cause panics produced by command below: > > cut begin > for i in 1 2 3; do while true; do ifconfig bridge0 create& \ > ifconfig bridge0 destroy& done& done > cut end Thanks, I couldn't reproduce it on any of the machines I tried. Did you managed to reproduce it with other pseudo-devices or just with bridge0? > My system was stable with the last diff I did for thread [1]. But since > this final diff [3] which include fixes for tun(4) is quick and dirty > and not for commit I decided to make the diff to fix the races caused by > if_clone_create() at first. > > I included screenshot with panic. Thanks, interesting that the corruption happens on a list that should be initialized. Does that mean the context switch on Thread 1 is happening before if_attach_common() is called? You said your previous email that there's a context switch. Do you know when it happens? You could see that in ddb by looking at the backtrace of the other CPU. Is the context switch leading to the race common to all pseudo-drivers or is it in the bridge(4) driver? Regarding your solution, do I understand correctly that the goal is to serialize all if_clone_create()? Is it really needed to remember which unit is being currently created or can't we just serialize all of them? The fact that a lock is not held over the cloning operation is imho positive.
Re: fix races in if_clone_create()
On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote: > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote: > > if_clone_create() has the races caused by context switch. > > Can you share a backtrace of such race? Where does the kernel panic? > This diff was inspired by thread [1]. As I explained [2] here is 3 issues that cause panics produced by command below: cut begin for i in 1 2 3; do while true; do ifconfig bridge0 create& \ ifconfig bridge0 destroy& done& done cut end My system was stable with the last diff I did for thread [1]. But since this final diff [3] which include fixes for tun(4) is quick and dirty and not for commit I decided to make the diff to fix the races caused by if_clone_create() at first. I included screenshot with panic. Also the code to reproduce below: cut begin #!/bin/sh while true; do ifconfig bridge0 create& done& while true; do ifconfig bridge0 destroy done cut end 1. https://marc.info/?t=15928959011=1=2 2. https://marc.info/?l=openbsd-tech=159307900124245=2 3. https://marc.info/?l=openbsd-tech=159308633126243=2
Re: fix races in if_clone_create()
Sorry, I found a memory leak. Updated diff below. Index: sys/net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.610 diff -u -p -r1.610 if.c --- sys/net/if.c22 Jun 2020 09:45:13 - 1.610 +++ sys/net/if.c26 Jun 2020 17:19:41 - @@ -157,6 +157,8 @@ voidif_linkstate_task(void *); intif_clone_list(struct if_clonereq *); struct if_clone*if_clone_lookup(const char *, int *); +intif_clone_alloc_unit(struct if_clone *, int); +void if_clone_rele_unit(struct if_clone *, int); intif_group_egress_build(void); @@ -1244,19 +1246,23 @@ if_clone_create(const char *name, int rd { struct if_clone *ifc; struct ifnet *ifp; - int unit, ret; + int unit, error; ifc = if_clone_lookup(name, ); if (ifc == NULL) return (EINVAL); + error = if_clone_alloc_unit(ifc, unit); + if (error != 0) + return (error); - if (ifunit(name) != NULL) - return (EEXIST); - - ret = (*ifc->ifc_create)(ifc, unit); + if (ifunit(name) != NULL) { + error = (EEXIST); + goto rele; + } - if (ret != 0 || (ifp = ifunit(name)) == NULL) - return (ret); + error = (*ifc->ifc_create)(ifc, unit); + if (error != 0 || (ifp = ifunit(name)) == NULL) + return (0); NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); @@ -1264,7 +1270,10 @@ if_clone_create(const char *name, int rd if_setrdomain(ifp, rdomain); NET_UNLOCK(); - return (ret); + return (0); +rele: + if_clone_rele_unit(ifc, unit); + return (error); } /* @@ -1275,9 +1284,9 @@ if_clone_destroy(const char *name) { struct if_clone *ifc; struct ifnet *ifp; - int ret; + int ret, unit; - ifc = if_clone_lookup(name, NULL); + ifc = if_clone_lookup(name, ); if (ifc == NULL) return (EINVAL); @@ -1297,6 +1306,7 @@ if_clone_destroy(const char *name) } NET_UNLOCK(); ret = (*ifc->ifc_destroy)(ifp); + if_clone_rele_unit(ifc, unit); return (ret); } @@ -1342,12 +1352,95 @@ if_clone_lookup(const char *name, int *u unit = (unit * 10) + (*cp++ - '0'); } - if (unitp != NULL) - *unitp = unit; + *unitp = unit; return (ifc); } /* + * Allocate unit for cloned network interface. + */ +int if_clone_alloc_unit(struct if_clone *ifc, int unit) +{ + int word, bit, ret; + + word = unit / (sizeof(*ifc->ifc_map) * 8); + bit = unit % (sizeof(*ifc->ifc_map) * 8); + + rw_enter_write(>ifc_lock); + + if(word >= ifc->ifc_map_size) { + u_long *map; + int size; + + size = word + 1; + map = mallocarray(size, sizeof(*map), M_TEMP, M_WAITOK | + M_ZERO); + + if (ifc->ifc_map != NULL) { + memcpy(map, ifc->ifc_map, ifc->ifc_map_size); + free(ifc->ifc_map, M_TEMP, + ifc->ifc_map_size * sizeof(*map)); + } + + ifc->ifc_map = map; + ifc->ifc_map_size = size; + } + + if (ifc->ifc_map[word] & (1UL << bit)) + ret = EEXIST; + else { + ifc->ifc_map[word] |= (1UL << bit); + ret = 0; + } + + rw_exit_write(>ifc_lock); + + return ret; +} + +/* + * Release allocated unit for cloned network interface. + */ +void if_clone_rele_unit(struct if_clone *ifc, int unit) +{ + int word, bit; + + word = unit / (sizeof(*ifc->ifc_map) * 8); + bit = unit % (sizeof(*ifc->ifc_map) * 8); + + rw_enter_write(>ifc_lock); + KASSERT(word < ifc->ifc_map_size); + + ifc->ifc_map[word] &= ~(1UL << bit); + + if (ifc->ifc_map[word] == 0) { + u_long *map; + int size; + + size = ifc->ifc_map_size - 2; + while (size>=0) { + if (ifc->ifc_map[size] != 0) + break; + --size; + } + if (size<0) { + size = 0; + map = NULL; + } else { + size += 1; + map = mallocarray(size, sizeof(*map), M_TEMP, + M_WAITOK); + memcpy(map, ifc->ifc_map, size); + } + + free(ifc->ifc_map, M_TEMP, ifc->ifc_map_size * sizeof(*map)); + ifc->ifc_map = map; + ifc->ifc_map_size = size; + } + rw_exit_write(>ifc_lock); +} + +/* * Register a network interface cloner. */ void @@ -1360,6 +1453,7 @@ if_clone_attach(struct if_clone *ifc) *
Re: fix races in if_clone_create()
On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote: > if_clone_create() has the races caused by context switch. Can you share a backtrace of such race? Where does the kernel panic?