On Tue, Mar 22, 2022 at 02:25:08PM +0100, Claudio Jeker wrote:
> On Tue, Mar 22, 2022 at 02:09:51PM +0100, Alexander Bluhm wrote:
> > Hi,
> > 
> > syzkaller and witness found the same bug I introduced in UDP also
> > for Raw IP.  Fix it the same was for rip and rip6.
> > 
> > https://syzkaller.appspot.com/bug?extid=9bac6356a881dc644265
> > https://syzkaller.appspot.com/bug?extid=5b2679ee9be0895d26f9
> > 
> > ok?
> 
> Absolutly not a fan of this "fix". It just moves the landmine that is
> about to explode a bit further to the left for the next person to step on.
> The moment someone tries to run these input handlers in parallel all of
> this will blow up. It is a workaround for now but how will we get out of
> this in the future when the code runs in parallel up to the socket
> layer?

Moving the problem around is the only way to make any progress.

The bug with MP forwarding I try to solve is this one.
https://marc.info/?l=openbsd-tech&m=163857624429253&w=2

After 4 months of ideas that were denied by different people, I
came to this solution.  Put a mutex around PCB tables.  I think
this is necessary anyway if we want to reach parallel protocol
processing.  Unfortunately I missed 3 of 4 places where I hold the
mutex too long.  I am trying to fix the last 2 of them.

I do not want to delay parallel forwaring until parallel protocol
layer is finished.  Then neither will happen.  If someone is working
on parallel protocols, this code will blow up due to NET_ASSERT_WLOCKED().
It has to be fixed then.  My change is delaying work to make progress
elsewhere.  We cannot solve everything in a big commit.

Do you have a better idea?

bluhm

> > Index: netinet/raw_ip.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
> > retrieving revision 1.125
> > diff -u -p -r1.125 raw_ip.c
> > --- netinet/raw_ip.c        21 Mar 2022 09:12:34 -0000      1.125
> > +++ netinet/raw_ip.c        22 Mar 2022 12:59:05 -0000
> > @@ -122,9 +122,9 @@ rip_input(struct mbuf **mp, int *offp, i
> >  {
> >     struct mbuf *m = *mp;
> >     struct ip *ip = mtod(m, struct ip *);
> > -   struct inpcb *inp, *last = NULL;
> > +   struct inpcb *inp;
> > +   SIMPLEQ_HEAD(, inpcb) inpcblist;
> >     struct in_addr *key;
> > -   struct mbuf *opts = NULL;
> >     struct counters_ref ref;
> >     uint64_t *counters;
> >  
> > @@ -150,7 +150,8 @@ rip_input(struct mbuf **mp, int *offp, i
> >             }
> >     }
> >  #endif
> > -   NET_ASSERT_LOCKED();
> > +   NET_ASSERT_WLOCKED();
> > +   SIMPLEQ_INIT(&inpcblist);
> >     mtx_enter(&rawcbtable.inpt_mtx);
> >     TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
> >             if (inp->inp_socket->so_state & SS_CANTRCVMORE)
> > @@ -171,41 +172,16 @@ rip_input(struct mbuf **mp, int *offp, i
> >             if (inp->inp_faddr.s_addr &&
> >                 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
> >                     continue;
> > -           if (last) {
> > -                   struct mbuf *n;
> >  
> > -                   if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
> > -                           if (last->inp_flags & INP_CONTROLOPTS ||
> > -                               last->inp_socket->so_options & SO_TIMESTAMP)
> > -                                   ip_savecontrol(last, &opts, ip, n);
> > -                           if (sbappendaddr(last->inp_socket,
> > -                               &last->inp_socket->so_rcv,
> > -                               sintosa(&ripsrc), n, opts) == 0) {
> > -                                   /* should notify about lost packet */
> > -                                   m_freem(n);
> > -                                   m_freem(opts);
> > -                           } else
> > -                                   sorwakeup(last->inp_socket);
> > -                           opts = NULL;
> > -                   }
> > -           }
> > -           last = inp;
> > +           in_pcbref(inp);
> > +           SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
> >     }
> >     mtx_leave(&rawcbtable.inpt_mtx);
> >  
> > -   if (last) {
> > -           if (last->inp_flags & INP_CONTROLOPTS ||
> > -               last->inp_socket->so_options & SO_TIMESTAMP)
> > -                   ip_savecontrol(last, &opts, ip, m);
> > -           if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
> > -               sintosa(&ripsrc), m, opts) == 0) {
> > -                   m_freem(m);
> > -                   m_freem(opts);
> > -           } else
> > -                   sorwakeup(last->inp_socket);
> > -   } else {
> > +   if (SIMPLEQ_EMPTY(&inpcblist)) {
> >             if (ip->ip_p != IPPROTO_ICMP)
> > -                   icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 
> > 0);
> > +                   icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
> > +                       0, 0);
> >             else
> >                     m_freem(m);
> >  
> > @@ -213,6 +189,30 @@ rip_input(struct mbuf **mp, int *offp, i
> >             counters[ips_noproto]++;
> >             counters[ips_delivered]--;
> >             counters_leave(&ref, ipcounters);
> > +   }
> > +
> > +   while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> > +           struct mbuf *n, *opts = NULL;
> > +
> > +           SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
> > +           if (SIMPLEQ_EMPTY(&inpcblist))
> > +                   n = m;
> > +           else
> > +                   n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> > +           if (n != NULL) {
> > +                   if (inp->inp_flags & INP_CONTROLOPTS ||
> > +                       inp->inp_socket->so_options & SO_TIMESTAMP)
> > +                           ip_savecontrol(inp, &opts, ip, n);
> > +                   if (sbappendaddr(inp->inp_socket,
> > +                       &inp->inp_socket->so_rcv,
> > +                       sintosa(&ripsrc), n, opts) == 0) {
> > +                           /* should notify about lost packet */
> > +                           m_freem(n);
> > +                           m_freem(opts);
> > +                   } else
> > +                           sorwakeup(inp->inp_socket);
> > +           }
> > +           in_pcbunref(inp);
> >     }
> >     return IPPROTO_DONE;
> >  }
> > Index: netinet6/raw_ip6.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
> > retrieving revision 1.145
> > diff -u -p -r1.145 raw_ip6.c
> > --- netinet6/raw_ip6.c      21 Mar 2022 09:12:34 -0000      1.145
> > +++ netinet6/raw_ip6.c      22 Mar 2022 12:59:05 -0000
> > @@ -121,10 +121,9 @@ rip6_input(struct mbuf **mp, int *offp, 
> >     struct mbuf *m = *mp;
> >     struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
> >     struct inpcb *in6p;
> > -   struct inpcb *last = NULL;
> > +   SIMPLEQ_HEAD(, inpcb) inpcblist;
> >     struct in6_addr *key;
> >     struct sockaddr_in6 rip6src;
> > -   struct mbuf *opts = NULL;
> >  
> >     KASSERT(af == AF_INET6);
> >  
> > @@ -156,7 +155,8 @@ rip6_input(struct mbuf **mp, int *offp, 
> >             }
> >     }
> >  #endif
> > -   NET_ASSERT_LOCKED();
> > +   NET_ASSERT_WLOCKED();
> > +   SIMPLEQ_INIT(&inpcblist);
> >     mtx_enter(&rawin6pcbtable.inpt_mtx);
> >     TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
> >             if (in6p->inp_socket->so_state & SS_CANTRCVMORE)
> > @@ -183,7 +183,7 @@ rip6_input(struct mbuf **mp, int *offp, 
> >                         sizeof(*icmp6));
> >                     if (icmp6 == NULL) {
> >                             mtx_leave(&rawin6pcbtable.inpt_mtx);
> > -                           return IPPROTO_DONE;
> > +                           goto bad;
> >                     }
> >                     if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
> >                         in6p->inp_icmp6filt))
> > @@ -206,42 +206,13 @@ rip6_input(struct mbuf **mp, int *offp, 
> >                             continue;
> >                     }
> >             }
> > -           if (last) {
> > -                   struct  mbuf *n;
> > -                   if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
> > -                           if (last->inp_flags & IN6P_CONTROLOPTS)
> > -                                   ip6_savecontrol(last, n, &opts);
> > -                           /* strip intermediate headers */
> > -                           m_adj(n, *offp);
> > -                           if (sbappendaddr(last->inp_socket,
> > -                               &last->inp_socket->so_rcv,
> > -                               sin6tosa(&rip6src), n, opts) == 0) {
> > -                                   /* should notify about lost packet */
> > -                                   m_freem(n);
> > -                                   m_freem(opts);
> > -                                   rip6stat_inc(rip6s_fullsock);
> > -                           } else
> > -                                   sorwakeup(last->inp_socket);
> > -                           opts = NULL;
> > -                   }
> > -           }
> > -           last = in6p;
> > +
> > +           in_pcbref(in6p);
> > +           SIMPLEQ_INSERT_TAIL(&inpcblist, in6p, inp_notify);
> >     }
> >     mtx_leave(&rawin6pcbtable.inpt_mtx);
> >  
> > -   if (last) {
> > -           if (last->inp_flags & IN6P_CONTROLOPTS)
> > -                   ip6_savecontrol(last, m, &opts);
> > -           /* strip intermediate headers */
> > -           m_adj(m, *offp);
> > -           if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
> > -               sin6tosa(&rip6src), m, opts) == 0) {
> > -                   m_freem(m);
> > -                   m_freem(opts);
> > -                   rip6stat_inc(rip6s_fullsock);
> > -           } else
> > -                   sorwakeup(last->inp_socket);
> > -   } else {
> > +   if (SIMPLEQ_EMPTY(&inpcblist)) {
> >             struct counters_ref ref;
> >             uint64_t *counters;
> >  
> > @@ -261,6 +232,39 @@ rip6_input(struct mbuf **mp, int *offp, 
> >             counters = counters_enter(&ref, ip6counters);
> >             counters[ip6s_delivered]--;
> >             counters_leave(&ref, ip6counters);
> > +   }
> > +
> > +   while ((in6p = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> > +           struct mbuf *n, *opts = NULL;
> > +
> > +           SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
> > +           if (SIMPLEQ_EMPTY(&inpcblist))
> > +                   n = m;
> > +           else
> > +                   n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> > +           if (n != NULL) {
> > +                   if (in6p->inp_flags & IN6P_CONTROLOPTS)
> > +                           ip6_savecontrol(in6p, n, &opts);
> > +                   /* strip intermediate headers */
> > +                   m_adj(n, *offp);
> > +                   if (sbappendaddr(in6p->inp_socket,
> > +                       &in6p->inp_socket->so_rcv,
> > +                       sin6tosa(&rip6src), n, opts) == 0) {
> > +                           /* should notify about lost packet */
> > +                           m_freem(n);
> > +                           m_freem(opts);
> > +                           rip6stat_inc(rip6s_fullsock);
> > +                   } else
> > +                           sorwakeup(in6p->inp_socket);
> > +           }
> > +           in_pcbunref(in6p);
> > +   }
> > +   return IPPROTO_DONE;
> > +
> > + bad:
> > +   while ((in6p = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> > +           SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
> > +           in_pcbunref(in6p);
> >     }
> >     return IPPROTO_DONE;
> >  }
> > 
> 
> -- 
> :wq Claudio

Reply via email to