Re: if_get NULL race arp, nd6, igmp

2022-03-26 Thread Denis Fondras
Le Tue, Mar 22, 2022 at 02:57:31PM +0100, Alexander Bluhm a écrit :
> anyone?
> 

It looks OK denis@

> On Fri, Mar 04, 2022 at 12:09:03PM +0100, Alexander Bluhm wrote:
> > Hi,
> > 
> > syzkaller has found this race in arp.
> > 
> > https://syzkaller.appspot.com/bug?id=e3dc94533ddee95b6d69c2e7049360022f4190d3
> > 
> > The assumption of the code is that either the arp entry or the
> > interface is removed.
> > 
> > But in if_detach() if_remove() is called without net lock and all
> > arp entries are removed later in in_ifdetach() -> in_purgeaddr()
> > -> rt_ifa_purge() -> rtdeletemsg().
> > 
> > When the arp timeout fires while if_detach() is between if_remove()
> > and NET_LOCK() then arptfree() has do deal with partially destroyed
> > interfaces.  We can skip rtdeletemsg() as if_detach() will take
> > care of it.
> > 
> > 
> > While syzkaller has not found it, nd6 has to deal with the same
> > problem.  Make nd6_free() simmilar to arptfree().
> > 
> > 
> > This crash may have the same source of problem.
> > 
> > https://syzkaller.appspot.com/bug?id=9649f7319437a49298a38572b83f38f0b7d37fbe
> > 
> > if_detach() does if_remove(ifp); NET_LOCK(); rti_delete().  So new
> > igmp groups may appear during interface destruction.  igmp_joingroup()
> > does not call rti_fill() as if_get() fails.  Then inm->inm_rti may
> > be NULL.  This is the condition when syzkaller crashes in
> > igmp_leavegroup().
> > 
> > When we pass the ifp this CPU is already holding, we avoid half
> > constructed igmp groups.  Calling if_get() multiple times in caller
> > and callee makes no sense anyway.
> > 
> > ok?  partial ok for one of the fixes also welcome.
> > 
> > bluhm
> > 
> > 
> > Index: netinet/if_ether.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> > retrieving revision 1.248
> > diff -u -p -r1.248 if_ether.c
> > --- netinet/if_ether.c  28 Apr 2021 21:21:44 -  1.248
> > +++ netinet/if_ether.c  3 Mar 2022 23:31:55 -
> > @@ -722,7 +722,9 @@ arptfree(struct rtentry *rt)
> > arpinvalidate(rt);
> >  
> > ifp = if_get(rt->rt_ifidx);
> > -   KASSERT(ifp != NULL);
> > +   if (ifp == NULL)
> > +   return;
> > +
> > if (!ISSET(rt->rt_flags, RTF_STATIC|RTF_CACHED))
> > rtdeletemsg(rt, ifp, ifp->if_rdomain);
> > if_put(ifp);
> > Index: netinet/igmp.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v
> > retrieving revision 1.77
> > diff -u -p -r1.77 igmp.c
> > --- netinet/igmp.c  15 Dec 2021 15:58:01 -  1.77
> > +++ netinet/igmp.c  3 Mar 2022 23:58:32 -
> > @@ -483,17 +483,14 @@ igmp_input_if(struct ifnet *ifp, struct 
> >  }
> >  
> >  void
> > -igmp_joingroup(struct in_multi *inm)
> > +igmp_joingroup(struct in_multi *inm, struct ifnet *ifp)
> >  {
> > -   struct ifnet* ifp;
> > int i;
> >  
> > -   ifp = if_get(inm->inm_ifidx);
> > -
> > inm->inm_state = IGMP_IDLE_MEMBER;
> >  
> > if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
> > -   ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
> > +   (ifp->if_flags & IFF_LOOPBACK) == 0) {
> > i = rti_fill(inm);
> > igmp_sendpkt(ifp, inm, i, 0);
> > inm->inm_state = IGMP_DELAYING_MEMBER;
> > @@ -502,22 +499,16 @@ igmp_joingroup(struct in_multi *inm)
> > igmp_timers_are_running = 1;
> > } else
> > inm->inm_timer = 0;
> > -
> > -   if_put(ifp);
> >  }
> >  
> >  void
> > -igmp_leavegroup(struct in_multi *inm)
> > +igmp_leavegroup(struct in_multi *inm, struct ifnet *ifp)
> >  {
> > -   struct ifnet* ifp;
> > -
> > -   ifp = if_get(inm->inm_ifidx);
> > -
> > switch (inm->inm_state) {
> > case IGMP_DELAYING_MEMBER:
> > case IGMP_IDLE_MEMBER:
> > if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
> > -   ifp && (ifp->if_flags & IFF_LOOPBACK) == 0)
> > +   (ifp->if_flags & IFF_LOOPBACK) == 0)
> > if (inm->inm_rti->rti_type != IGMP_v1_ROUTER)
> > igmp_sendpkt(ifp, inm,
> > IGMP_HOST_LEAVE_MESSAGE,
> > @@ -528,7 +519,6 @@ igmp_leavegroup(struct in_multi *inm)
> > case IGMP_SLEEPING_MEMBER:
> > break;
> > }
> > -   if_put(ifp);
> >  }
> >  
> >  void
> > Index: netinet/igmp_var.h
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp_var.h,v
> > retrieving revision 1.14
> > diff -u -p -r1.14 igmp_var.h
> > --- netinet/igmp_var.h  17 Aug 2020 16:25:34 -  1.14
> > +++ netinet/igmp_var.h  4 Mar 2022 00:02:45 -
> > @@ -107,8 +107,8 @@ igmpstat_inc(enum igmpstat_counters c)
> >  
> >  void   igmp_init(void);
> >  intigmp_input(struct mbuf **, int *, int, int);
> > -void   igmp_joingroup(struct in_multi *);
> > -void   

Re: if_get NULL race arp, nd6, igmp

2022-03-22 Thread Alexander Bluhm
anyone?

On Fri, Mar 04, 2022 at 12:09:03PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> syzkaller has found this race in arp.
> 
> https://syzkaller.appspot.com/bug?id=e3dc94533ddee95b6d69c2e7049360022f4190d3
> 
> The assumption of the code is that either the arp entry or the
> interface is removed.
> 
> But in if_detach() if_remove() is called without net lock and all
> arp entries are removed later in in_ifdetach() -> in_purgeaddr()
> -> rt_ifa_purge() -> rtdeletemsg().
> 
> When the arp timeout fires while if_detach() is between if_remove()
> and NET_LOCK() then arptfree() has do deal with partially destroyed
> interfaces.  We can skip rtdeletemsg() as if_detach() will take
> care of it.
> 
> 
> While syzkaller has not found it, nd6 has to deal with the same
> problem.  Make nd6_free() simmilar to arptfree().
> 
> 
> This crash may have the same source of problem.
> 
> https://syzkaller.appspot.com/bug?id=9649f7319437a49298a38572b83f38f0b7d37fbe
> 
> if_detach() does if_remove(ifp); NET_LOCK(); rti_delete().  So new
> igmp groups may appear during interface destruction.  igmp_joingroup()
> does not call rti_fill() as if_get() fails.  Then inm->inm_rti may
> be NULL.  This is the condition when syzkaller crashes in
> igmp_leavegroup().
> 
> When we pass the ifp this CPU is already holding, we avoid half
> constructed igmp groups.  Calling if_get() multiple times in caller
> and callee makes no sense anyway.
> 
> ok?  partial ok for one of the fixes also welcome.
> 
> bluhm
> 
> 
> Index: netinet/if_ether.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> retrieving revision 1.248
> diff -u -p -r1.248 if_ether.c
> --- netinet/if_ether.c28 Apr 2021 21:21:44 -  1.248
> +++ netinet/if_ether.c3 Mar 2022 23:31:55 -
> @@ -722,7 +722,9 @@ arptfree(struct rtentry *rt)
>   arpinvalidate(rt);
>  
>   ifp = if_get(rt->rt_ifidx);
> - KASSERT(ifp != NULL);
> + if (ifp == NULL)
> + return;
> +
>   if (!ISSET(rt->rt_flags, RTF_STATIC|RTF_CACHED))
>   rtdeletemsg(rt, ifp, ifp->if_rdomain);
>   if_put(ifp);
> Index: netinet/igmp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 igmp.c
> --- netinet/igmp.c15 Dec 2021 15:58:01 -  1.77
> +++ netinet/igmp.c3 Mar 2022 23:58:32 -
> @@ -483,17 +483,14 @@ igmp_input_if(struct ifnet *ifp, struct 
>  }
>  
>  void
> -igmp_joingroup(struct in_multi *inm)
> +igmp_joingroup(struct in_multi *inm, struct ifnet *ifp)
>  {
> - struct ifnet* ifp;
>   int i;
>  
> - ifp = if_get(inm->inm_ifidx);
> -
>   inm->inm_state = IGMP_IDLE_MEMBER;
>  
>   if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
> - ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
> + (ifp->if_flags & IFF_LOOPBACK) == 0) {
>   i = rti_fill(inm);
>   igmp_sendpkt(ifp, inm, i, 0);
>   inm->inm_state = IGMP_DELAYING_MEMBER;
> @@ -502,22 +499,16 @@ igmp_joingroup(struct in_multi *inm)
>   igmp_timers_are_running = 1;
>   } else
>   inm->inm_timer = 0;
> -
> - if_put(ifp);
>  }
>  
>  void
> -igmp_leavegroup(struct in_multi *inm)
> +igmp_leavegroup(struct in_multi *inm, struct ifnet *ifp)
>  {
> - struct ifnet* ifp;
> -
> - ifp = if_get(inm->inm_ifidx);
> -
>   switch (inm->inm_state) {
>   case IGMP_DELAYING_MEMBER:
>   case IGMP_IDLE_MEMBER:
>   if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
> - ifp && (ifp->if_flags & IFF_LOOPBACK) == 0)
> + (ifp->if_flags & IFF_LOOPBACK) == 0)
>   if (inm->inm_rti->rti_type != IGMP_v1_ROUTER)
>   igmp_sendpkt(ifp, inm,
>   IGMP_HOST_LEAVE_MESSAGE,
> @@ -528,7 +519,6 @@ igmp_leavegroup(struct in_multi *inm)
>   case IGMP_SLEEPING_MEMBER:
>   break;
>   }
> - if_put(ifp);
>  }
>  
>  void
> Index: netinet/igmp_var.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp_var.h,v
> retrieving revision 1.14
> diff -u -p -r1.14 igmp_var.h
> --- netinet/igmp_var.h17 Aug 2020 16:25:34 -  1.14
> +++ netinet/igmp_var.h4 Mar 2022 00:02:45 -
> @@ -107,8 +107,8 @@ igmpstat_inc(enum igmpstat_counters c)
>  
>  void igmp_init(void);
>  int  igmp_input(struct mbuf **, int *, int, int);
> -void igmp_joingroup(struct in_multi *);
> -void igmp_leavegroup(struct in_multi *);
> +void igmp_joingroup(struct in_multi *, struct ifnet *);
> +void igmp_leavegroup(struct in_multi *, struct ifnet *);
>  void igmp_fasttimo(void);
>  void igmp_slowtimo(void);
>  int  igmp_sysctl(int *, u_int, void *, size_t *, void *, size_t);
> Index: netinet/in.c
> 

if_get NULL race arp, nd6, igmp

2022-03-04 Thread Alexander Bluhm
Hi,

syzkaller has found this race in arp.

https://syzkaller.appspot.com/bug?id=e3dc94533ddee95b6d69c2e7049360022f4190d3

The assumption of the code is that either the arp entry or the
interface is removed.

But in if_detach() if_remove() is called without net lock and all
arp entries are removed later in in_ifdetach() -> in_purgeaddr()
-> rt_ifa_purge() -> rtdeletemsg().

When the arp timeout fires while if_detach() is between if_remove()
and NET_LOCK() then arptfree() has do deal with partially destroyed
interfaces.  We can skip rtdeletemsg() as if_detach() will take
care of it.


While syzkaller has not found it, nd6 has to deal with the same
problem.  Make nd6_free() simmilar to arptfree().


This crash may have the same source of problem.

https://syzkaller.appspot.com/bug?id=9649f7319437a49298a38572b83f38f0b7d37fbe

if_detach() does if_remove(ifp); NET_LOCK(); rti_delete().  So new
igmp groups may appear during interface destruction.  igmp_joingroup()
does not call rti_fill() as if_get() fails.  Then inm->inm_rti may
be NULL.  This is the condition when syzkaller crashes in
igmp_leavegroup().

When we pass the ifp this CPU is already holding, we avoid half
constructed igmp groups.  Calling if_get() multiple times in caller
and callee makes no sense anyway.

ok?  partial ok for one of the fixes also welcome.

bluhm


Index: netinet/if_ether.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.248
diff -u -p -r1.248 if_ether.c
--- netinet/if_ether.c  28 Apr 2021 21:21:44 -  1.248
+++ netinet/if_ether.c  3 Mar 2022 23:31:55 -
@@ -722,7 +722,9 @@ arptfree(struct rtentry *rt)
arpinvalidate(rt);
 
ifp = if_get(rt->rt_ifidx);
-   KASSERT(ifp != NULL);
+   if (ifp == NULL)
+   return;
+
if (!ISSET(rt->rt_flags, RTF_STATIC|RTF_CACHED))
rtdeletemsg(rt, ifp, ifp->if_rdomain);
if_put(ifp);
Index: netinet/igmp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp.c,v
retrieving revision 1.77
diff -u -p -r1.77 igmp.c
--- netinet/igmp.c  15 Dec 2021 15:58:01 -  1.77
+++ netinet/igmp.c  3 Mar 2022 23:58:32 -
@@ -483,17 +483,14 @@ igmp_input_if(struct ifnet *ifp, struct 
 }
 
 void
-igmp_joingroup(struct in_multi *inm)
+igmp_joingroup(struct in_multi *inm, struct ifnet *ifp)
 {
-   struct ifnet* ifp;
int i;
 
-   ifp = if_get(inm->inm_ifidx);
-
inm->inm_state = IGMP_IDLE_MEMBER;
 
if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
-   ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
+   (ifp->if_flags & IFF_LOOPBACK) == 0) {
i = rti_fill(inm);
igmp_sendpkt(ifp, inm, i, 0);
inm->inm_state = IGMP_DELAYING_MEMBER;
@@ -502,22 +499,16 @@ igmp_joingroup(struct in_multi *inm)
igmp_timers_are_running = 1;
} else
inm->inm_timer = 0;
-
-   if_put(ifp);
 }
 
 void
-igmp_leavegroup(struct in_multi *inm)
+igmp_leavegroup(struct in_multi *inm, struct ifnet *ifp)
 {
-   struct ifnet* ifp;
-
-   ifp = if_get(inm->inm_ifidx);
-
switch (inm->inm_state) {
case IGMP_DELAYING_MEMBER:
case IGMP_IDLE_MEMBER:
if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
-   ifp && (ifp->if_flags & IFF_LOOPBACK) == 0)
+   (ifp->if_flags & IFF_LOOPBACK) == 0)
if (inm->inm_rti->rti_type != IGMP_v1_ROUTER)
igmp_sendpkt(ifp, inm,
IGMP_HOST_LEAVE_MESSAGE,
@@ -528,7 +519,6 @@ igmp_leavegroup(struct in_multi *inm)
case IGMP_SLEEPING_MEMBER:
break;
}
-   if_put(ifp);
 }
 
 void
Index: netinet/igmp_var.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/igmp_var.h,v
retrieving revision 1.14
diff -u -p -r1.14 igmp_var.h
--- netinet/igmp_var.h  17 Aug 2020 16:25:34 -  1.14
+++ netinet/igmp_var.h  4 Mar 2022 00:02:45 -
@@ -107,8 +107,8 @@ igmpstat_inc(enum igmpstat_counters c)
 
 void   igmp_init(void);
 intigmp_input(struct mbuf **, int *, int, int);
-void   igmp_joingroup(struct in_multi *);
-void   igmp_leavegroup(struct in_multi *);
+void   igmp_joingroup(struct in_multi *, struct ifnet *);
+void   igmp_leavegroup(struct in_multi *, struct ifnet *);
 void   igmp_fasttimo(void);
 void   igmp_slowtimo(void);
 intigmp_sysctl(int *, u_int, void *, size_t *, void *, size_t);
Index: netinet/in.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.c,v
retrieving revision 1.171
diff -u -p -r1.171 in.c
--- netinet/in.c10 Mar 2021 10:21:48 -  1.171
+++ netinet/in.c3 Mar 2022 23:57:49 -
@@