Please drop this diff.

Within rt_clone() we have the code section protected by kernel lock, and
this code section could be reached with mutex(9) held.

pipex(4) needs to be more refactored before introduce per-session
locking.

> On 28 Jun 2022, at 23:14, Vitaliy Makkoveev <[email protected]> wrote:
> 
> On Tue, Jun 28, 2022 at 09:05:09PM +0300, Vitaliy Makkoveev wrote:
>> The updated diff. It was triggered by Hrvoje Popovski, we could do
>> direct (*if_qstart)() call in pipex(4) PPPOE input path, and this
>> provides deadlock. The updated diff uses ip{,6}_send() instead of
>> ipv{4,6}_input().
>> 
> 
> I think, I found better solution. The diff below still uses
> ipv{4,6}_input(), but introduces custom pipex_if_enqueue()
> (*if_enqueue)() handler. Now we can be sure, our (*if_qstart)() handlers
> will never be called directly. Also we can be sure about netlock state
> within our (*if_qstart)() handlers and remove pipexintr().
> 
> We do direct (*if_qstart)() call only if we overflow `if_snd' queue, and
> of course we loose packets in this case, so the behavior mostly the
> same.
> 
> Index: sys/net/if_pppx.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.117
> diff -u -p -r1.117 if_pppx.c
> --- sys/net/if_pppx.c 26 Jun 2022 22:51:58 -0000      1.117
> +++ sys/net/if_pppx.c 28 Jun 2022 19:56:47 -0000
> @@ -651,6 +651,7 @@ pppx_add_session(struct pppx_dev *pxd, s
>       ifp->if_mtu = req->pr_peer_mru; /* XXX */
>       ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST | IFF_UP;
>       ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
> +     ifp->if_enqueue = pipex_if_enqueue;
>       ifp->if_qstart = pppx_if_qstart;
>       ifp->if_output = pppx_if_output;
>       ifp->if_ioctl = pppx_if_ioctl;
> @@ -659,8 +660,6 @@ pppx_add_session(struct pppx_dev *pxd, s
>       ifp->if_softc = pxi;
>       /* ifp->if_rdomain = req->pr_rdomain; */
>       if_counters_alloc(ifp);
> -     /* XXXSMP: be sure pppx_if_qstart() called with NET_LOCK held */
> -     ifq_set_maxlen(&ifp->if_snd, 1);
> 
>       /* XXXSMP breaks atomicity */
>       NET_UNLOCK();
> @@ -801,13 +800,16 @@ pppx_if_qstart(struct ifqueue *ifq)
>       struct mbuf *m;
>       int proto;
> 
> -     NET_ASSERT_LOCKED();
> +     mtx_enter(&pxi->pxi_session->pxs_mtx);
> +
>       while ((m = ifq_dequeue(ifq)) != NULL) {
>               proto = *mtod(m, int *);
>               m_adj(m, sizeof(proto));
> 
>               pipex_ppp_output(m, pxi->pxi_session, proto);
>       }
> +
> +     mtx_leave(&pxi->pxi_session->pxs_mtx);
> }
> 
> int
> @@ -1041,11 +1043,10 @@ pppacopen(dev_t dev, int flags, int mode
>       ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST;
>       ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
>       ifp->if_rtrequest = p2p_rtrequest; /* XXX */
> +     ifp->if_enqueue = pipex_if_enqueue;
>       ifp->if_output = pppac_output;
>       ifp->if_qstart = pppac_qstart;
>       ifp->if_ioctl = pppac_ioctl;
> -     /* XXXSMP: be sure pppac_qstart() called with NET_LOCK held */
> -     ifq_set_maxlen(&ifp->if_snd, 1);
> 
>       if_counters_alloc(ifp);
>       if_attach(ifp);
> @@ -1441,7 +1442,6 @@ pppac_qstart(struct ifqueue *ifq)
>       struct ip ip;
>       int rv;
> 
> -     NET_ASSERT_LOCKED();
>       while ((m = ifq_dequeue(ifq)) != NULL) {
> #if NBPFILTER > 0
>               if (ifp->if_bpf) {
> Index: sys/net/pipex.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.142
> diff -u -p -r1.142 pipex.c
> --- sys/net/pipex.c   28 Jun 2022 08:01:40 -0000      1.142
> +++ sys/net/pipex.c   28 Jun 2022 19:56:47 -0000
> @@ -91,7 +91,6 @@ struct pool mppe_key_pool;
>  * Locks used to protect global data
>  *       A       atomic operation
>  *       I       immutable after creation
> - *       N       net lock
>  *       L       pipex_list_mtx
>  */
> 
> @@ -172,7 +171,6 @@ pipex_ioctl(void *ownersc, u_long cmd, c
> {
>       int ret = 0;
> 
> -     NET_ASSERT_LOCKED();
>       switch (cmd) {
>       case PIPEXCSESSION:
>               ret = pipex_config_session(
> @@ -197,6 +195,19 @@ pipex_ioctl(void *ownersc, u_long cmd, c
>       return (ret);
> }
> 
> +int
> +pipex_if_enqueue(struct ifnet *ifp, struct mbuf *m)
> +{
> +     struct ifqueue *ifq = &ifp->if_snd;
> +     int error;
> +
> +     if ((error = ifq_enqueue(ifq, m)) != 0)
> +             return (error);
> +     task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
> +
> +     return (0);
> +}
> +
> /************************************************************************
>  * Software Interrupt Handler
>  ************************************************************************/
> @@ -575,18 +586,20 @@ pipex_config_session(struct pipex_sessio
>       struct pipex_session *session;
>       int error = 0;
> 
> -     NET_ASSERT_LOCKED();
> -
>       session = pipex_lookup_by_session_id(req->pcr_protocol,
>           req->pcr_session_id);
>       if (session == NULL)
>               return (EINVAL);
> 
>       if (session->ownersc == ownersc) {
> +             mtx_enter(&session->pxs_mtx);
> +
>               if (req->pcr_ip_forward != 0)
>                       session->flags |= PIPEX_SFLAGS_IP_FORWARD;
>               else
>                       session->flags &= ~PIPEX_SFLAGS_IP_FORWARD;
> +
> +             mtx_leave(&session->pxs_mtx);
>       } else
>               error = EINVAL;
> 
> @@ -601,8 +614,6 @@ pipex_get_stat(struct pipex_session_stat
>       struct pipex_session *session;
>       int error = 0;
> 
> -     NET_ASSERT_LOCKED();
> -
>       session = pipex_lookup_by_session_id(req->psr_protocol,
>           req->psr_session_id);
>       if (session == NULL)
> @@ -811,6 +822,9 @@ pipex_ip_output(struct mbuf *m0, struct 
>               /*
>                * Multicast packet is a idle packet and it's not TCP.
>                */
> +
> +             mtx_enter(&session->pxs_mtx);
> +
>               if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
>                   PIPEX_SFLAGS_IP6_FORWARD)) == 0)
>                       goto drop;
> @@ -837,6 +851,8 @@ pipex_ip_output(struct mbuf *m0, struct 
>               }
> 
>               pipex_ppp_output(m0, session, PPP_IP);
> +
> +             mtx_leave(&session->pxs_mtx);
>       } else {
>               struct pipex_session *session_tmp;
>               struct mbuf *m;
> @@ -846,16 +862,21 @@ pipex_ip_output(struct mbuf *m0, struct 
>               LIST_FOREACH(session_tmp, &pipex_session_list, session_list) {
>                       if (session_tmp->ownersc != session->ownersc)
>                               continue;
> +
> +                     mtx_enter(&session->pxs_mtx);
> +
>                       if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
>                           PIPEX_SFLAGS_IP6_FORWARD)) == 0)
> -                             continue;
> +                             goto next;
>                       m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
>                       if (m == NULL) {
>                               counters_inc(session->stat_counters,
>                                   pxc_oerrors);
> -                             continue;
> +                             goto next;
>                       }
>                       pipex_ppp_output(m, session_tmp, PPP_IP);
> +next:
> +                     mtx_leave(&session->pxs_mtx);
>               }
>               m_freem(m0);
>       }
> @@ -864,6 +885,7 @@ pipex_ip_output(struct mbuf *m0, struct 
> drop:
>       m_freem(m0);
> dropped:
> +     mtx_leave(&session->pxs_mtx);
>       counters_inc(session->stat_counters, pxc_oerrors);
> }
> 
> @@ -872,7 +894,7 @@ pipex_ppp_output(struct mbuf *m0, struct
> {
>       u_char *cp, hdr[16];
> 
> -     NET_ASSERT_LOCKED();
> +     MUTEX_ASSERT_LOCKED(&session->pxs_mtx);
> 
> #ifdef PIPEX_MPPE
>       if (pipex_session_is_mppe_enabled(session)) {
> @@ -927,6 +949,8 @@ pipex_ppp_input(struct mbuf *m0, struct 
>       int proto, hlen = 0;
>       struct mbuf *n;
> 
> +     MUTEX_ASSERT_LOCKED(&session->pxs_mtx);
> +
>       KASSERT(m0->m_pkthdr.len >= PIPEX_PPPMINLEN);
>       proto = pipex_ppp_proto(m0, session, 0, &hlen);
> #ifdef PIPEX_MPPE
> @@ -1288,11 +1312,16 @@ pipex_pppoe_input(struct mbuf *m0, struc
>           sizeof(struct pipex_pppoe_header), &pppoe);
> 
>       hlen = sizeof(struct ether_header) + sizeof(struct pipex_pppoe_header);
> -     if ((m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length)))
> -         == NULL)
> -             return (NULL);
> -     m_freem(m0);
> -     counters_inc(session->stat_counters, pxc_ierrors);
> +
> +     mtx_enter(&session->pxs_mtx);
> +     m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length));
> +     mtx_leave(&session->pxs_mtx);
> +
> +     if (m0 != NULL) {
> +             m_freem(m0);
> +             counters_inc(session->stat_counters, pxc_ierrors);
> +     }
> +
>       return (NULL);
> }
> 
> @@ -1354,6 +1383,8 @@ pipex_pptp_output(struct mbuf *m0, struc
>       struct ip *ip;
>       u_char *cp;
> 
> +     MUTEX_ASSERT_LOCKED(&session->pxs_mtx);
> +
>       reqlen = PIPEX_IPGRE_HDRLEN + (has_seq + has_ack) * 4;
> 
>       len = 0;
> @@ -1507,7 +1538,6 @@ pipex_pptp_input(struct mbuf *m0, struct
>       struct pipex_pptp_session *pptp_session;
>       int rewind = 0;
> 
> -     NET_ASSERT_LOCKED();
>       KASSERT(m0->m_pkthdr.len >= PIPEX_IPGRE_HDRLEN);
>       pptp_session = &session->proto.pptp;
> 
> @@ -1537,6 +1567,9 @@ pipex_pptp_input(struct mbuf *m0, struct
>               seqp = cp;
>               GETLONG(seq, cp);
>       }
> +
> +     mtx_enter(&session->pxs_mtx);
> +
>       if (has_ack) {
>               ackp = cp;
>               GETLONG(ack, cp);
> @@ -1585,6 +1618,7 @@ pipex_pptp_input(struct mbuf *m0, struct
>               /* ok,  The packet is for PIPEX */
>               if (!rewind)
>                       session->proto.pptp.rcv_gap += nseq;
> +             mtx_leave(&session->pxs_mtx);
>               return (NULL);
>       }
> 
> @@ -1618,6 +1652,8 @@ not_ours:
>               PUTLONG(ack, ackp);
>       }
> 
> +     mtx_leave(&session->pxs_mtx);
> +
>       return (m0);
> out_seq:
>       pipex_session_log(session, LOG_DEBUG,
> @@ -1626,6 +1662,7 @@ out_seq:
>           pptp_session->rcv_nxt + pptp_session->maxwinsz,
>           ack, pptp_session->snd_una,
>           pptp_session->snd_nxt);
> +     mtx_leave(&session->pxs_mtx);
> 
>       /* FALLTHROUGH */
> drop:
> @@ -1755,6 +1792,8 @@ pipex_pptp_userland_output(struct mbuf *
>       gre = mtod(m0, struct pipex_gre_header *);
>       cp = PIPEX_SEEK_NEXTHDR(gre, sizeof(struct pipex_gre_header), u_char *);
> 
> +     mtx_enter(&session->pxs_mtx);
> +
>       /*
>        * overwrite sequence numbers to adjust a gap between pipex and
>        * userland.
> @@ -1775,6 +1814,8 @@ pipex_pptp_userland_output(struct mbuf *
>                       session->proto.pptp.rcv_acked = val32;
>       }
> 
> +     mtx_leave(&session->pxs_mtx);
> +
>       return (m0);
> }
> #endif /* PIPEX_PPTP */
> @@ -1796,6 +1837,8 @@ pipex_l2tp_output(struct mbuf *m0, struc
> #endif
>       struct m_tag *mtag;
> 
> +     MUTEX_ASSERT_LOCKED(&session->pxs_mtx);
> +
>       hlen = sizeof(struct pipex_l2tp_header) +
>           ((pipex_session_is_l2tp_data_sequencing_on(session))
>                   ? sizeof(struct pipex_l2tp_seq_header) : 0) +
> @@ -1967,17 +2010,15 @@ pipex_l2tp_input(struct mbuf *m0, int of
>     uint32_t ipsecflowinfo)
> {
>       struct pipex_l2tp_session *l2tp_session;
> -     int length, offset, hlen, nseq;
> -     u_char *cp, *nsp, *nrp;
> +     int length = 0, offset = 0, hlen, nseq;
> +     u_char *cp, *nsp = NULL, *nrp = NULL;
>       uint16_t flags, ns = 0, nr = 0;
>       int rewind = 0;
> 
> -     NET_ASSERT_LOCKED();
> +     mtx_enter(&session->pxs_mtx);
> 
> -     length = offset = ns = nr = 0;
>       l2tp_session = &session->proto.l2tp;
>       l2tp_session->ipsecflowinfo = ipsecflowinfo;
> -     nsp = nrp = NULL;
> 
>       m_copydata(m0, off0, sizeof(flags), &flags);
> 
> @@ -2039,6 +2080,8 @@ pipex_l2tp_input(struct mbuf *m0, int of
>               /* ok,  The packet is for PIPEX */
>               if (!rewind)
>                       session->proto.l2tp.nr_gap += nseq;
> +             mtx_leave(&session->pxs_mtx);
> +
>               return (NULL);
>       }
> 
> @@ -2069,15 +2112,18 @@ pipex_l2tp_input(struct mbuf *m0, int of
>               PUTSHORT(nr, nrp);
>       }
> 
> +     mtx_leave(&session->pxs_mtx);
> +
>       return (m0);
> out_seq:
>       pipex_session_log(session, LOG_DEBUG,
>           "Received bad data packet: out of sequence: seq=%u(%u-) "
>           "ack=%u(%u-%u)", ns, l2tp_session->nr_nxt, nr, l2tp_session->ns_una,
>           l2tp_session->ns_nxt);
> -
>       /* FALLTHROUGH */
> drop:
> +     mtx_leave(&session->pxs_mtx);
> +
>       m_freem(m0);
>       counters_inc(session->stat_counters, pxc_ierrors);
> 
> @@ -2202,6 +2248,8 @@ pipex_l2tp_userland_output(struct mbuf *
>       ns = ntohs(seq->ns);
>       nr = ntohs(seq->nr);
> 
> +     mtx_enter(&session->pxs_mtx);
> +
>       ns += session->proto.l2tp.ns_gap;
>       seq->ns = htons(ns);
>       session->proto.l2tp.ns_nxt++;
> @@ -2211,6 +2259,8 @@ pipex_l2tp_userland_output(struct mbuf *
>       if (SEQ16_GT(nr, session->proto.l2tp.nr_acked))
>               session->proto.l2tp.nr_acked = nr;
> 
> +     mtx_leave(&session->pxs_mtx);
> +
>       return (m0);
> }
> #endif /* PIPEX_L2TP */
> @@ -2379,6 +2429,8 @@ pipex_mppe_input(struct mbuf *m0, struct
>       u_char *cp;
>       int rewind = 0;
> 
> +     MUTEX_ASSERT_LOCKED(&session->pxs_mtx);
> +
>       /* pullup */
>       PIPEX_PULLUP(m0, sizeof(coher_cnt));
>       if (m0 == NULL)
> @@ -2471,10 +2523,8 @@ pipex_mppe_input(struct mbuf *m0, struct
>                       /* Send CCP ResetReq */
>                       PIPEX_DBG((session, LOG_DEBUG, "CCP SendResetReq"));
> 
> -                     mtx_enter(&session->pxs_mtx);
>                       ccp_id = session->ccp_id;
>                       session->ccp_id++;
> -                     mtx_leave(&session->pxs_mtx);
> 
>                       pipex_ccp_output(session, CCP_RESETREQ, ccp_id);
>                       goto drop;
> Index: sys/net/pipex_local.h
> ===================================================================
> RCS file: /cvs/src/sys/net/pipex_local.h,v
> retrieving revision 1.47
> diff -u -p -r1.47 pipex_local.h
> --- sys/net/pipex_local.h     26 Jun 2022 15:50:21 -0000      1.47
> +++ sys/net/pipex_local.h     28 Jun 2022 19:56:47 -0000
> @@ -56,8 +56,8 @@ extern struct mutex pipex_list_mtx;
> 
> /*
>  * Locks used to protect struct members:
> + *      A       atomic operation
>  *      I       immutable after creation
> - *      N       net lock
>  *      L       pipex_list_mtx
>  *      s       this pipex_session' `pxs_mtx'
>  *      m       this pipex_mppe' `pxm_mtx'
> @@ -91,14 +91,14 @@ struct pipex_pppoe_session {
> #ifdef PIPEX_PPTP
> struct pipex_pptp_session {
>       /* sequence number gap between pipex and userland */
> -     int32_t snd_gap;                /* [N] gap of our sequence */
> -     int32_t rcv_gap;                /* [N] gap of peer's sequence */
> -     int32_t ul_snd_una;             /* [N] userland send acked seq */
> -
> -     uint32_t snd_nxt;               /* [N] send next */
> -     uint32_t rcv_nxt;               /* [N] receive next */
> -     uint32_t snd_una;               /* [N] send acked sequence */
> -     uint32_t rcv_acked;             /* [N] recv acked sequence */
> +     int32_t snd_gap;                /* [s] gap of our sequence */
> +     int32_t rcv_gap;                /* [s] gap of peer's sequence */
> +     int32_t ul_snd_una;             /* [s] userland send acked seq */
> +
> +     uint32_t snd_nxt;               /* [s] send next */
> +     uint32_t rcv_nxt;               /* [s] receive next */
> +     uint32_t snd_una;               /* [s] send acked sequence */
> +     uint32_t rcv_acked;             /* [s] recv acked sequence */
> 
>       int winsz;                      /* [I] windows size */
>       int maxwinsz;                   /* [I] max windows size */
> @@ -141,16 +141,16 @@ struct pipex_l2tp_session {
> 
>       uint32_t option_flags;          /* [I] protocol options */
> 
> -     int16_t ns_gap;         /* [N] gap between userland and pipex */
> -     int16_t nr_gap;         /* [N] gap between userland and pipex */
> -     uint16_t ul_ns_una;     /* [N] unacked sequence number (userland) */
> -
> -     uint16_t ns_nxt;        /* [N] next sequence number to send */
> -     uint16_t ns_una;        /* [N] unacked sequence number to send */
> -
> -     uint16_t nr_nxt;        /* [N] next sequence number to recv */
> -     uint16_t nr_acked;      /* [N] acked sequence number to recv */
> -     uint32_t ipsecflowinfo; /* [N] IPsec SA flow id for NAT-T */
> +     int16_t ns_gap;         /* [s] gap between userland and pipex */
> +     int16_t nr_gap;         /* [s] gap between userland and pipex */
> +     uint16_t ul_ns_una;     /* [s] unacked sequence number (userland) */
> +
> +     uint16_t ns_nxt;        /* [s] next sequence number to send */
> +     uint16_t ns_una;        /* [s] unacked sequence number to send */
> +
> +     uint16_t nr_nxt;        /* [s] next sequence number to recv */
> +     uint16_t nr_acked;      /* [s] acked sequence number to recv */
> +     uint32_t ipsecflowinfo; /* [s] IPsec SA flow id for NAT-T */
> };
> #endif /* PIPEX_L2TP */
> 
> @@ -180,9 +180,9 @@ struct pipex_session {
> 
>       uint32_t        idle_time;      /* [L] idle time in seconds */
> 
> -     u_int           flags;          /* [N] flags, see below */
> -#define PIPEX_SFLAGS_IP_FORWARD              0x01 /* [N] enable IP 
> forwarding */
> -#define PIPEX_SFLAGS_IP6_FORWARD     0x02 /* [N] enable IPv6 forwarding */
> +     u_int           flags;          /* [s] flags, see below */
> +#define PIPEX_SFLAGS_IP_FORWARD              0x01 /* [s] enable IP 
> forwarding */
> +#define PIPEX_SFLAGS_IP6_FORWARD     0x02 /* [s] enable IPv6 forwarding */
> #define PIPEX_SFLAGS_MULTICAST                0x04 /* [I] virtual entry for
>                                               multicast */
> #define PIPEX_SFLAGS_PPPX             0x08 /* [I] interface is
> @@ -200,7 +200,7 @@ struct pipex_session {
>       struct sockaddr_in6 ip6_address; /* [I] remote IPv6 address */
>       int             ip6_prefixlen;   /* [I] remote IPv6 prefixlen */
> 
> -     u_int           ifindex;                /* [N] interface index */
> +     u_int           ifindex;                /* [A] interface index */
>       void            *ownersc;               /* [I] owner context */
> 
>       uint32_t        ppp_flags;              /* [I] configure flags */
> @@ -469,3 +469,4 @@ int                   pipex_ppp_enqueue 
> void                  pipex_timer_start (void);
> void                  pipex_timer_stop (void);
> void                  pipex_timer (void *);
> +int                   pipex_if_enqueue(struct ifnet *, struct mbuf *);

Reply via email to