On Thu, Jul 14, 2022 at 04:56:03PM +0200, Alexander Bluhm wrote:
> On Wed, Jul 13, 2022 at 12:49:53PM +0300, Vitaliy Makkoveev wrote:
> > Use per-session `pxs_mtx' mutex(9) to protect session context. Except
> > MPPE encryption, PPPOE sessions are mostly immutable, so no lock
> > required for that case.
> >
> > Global pipex(4) data is already protected by `pipex_list_mtx', so
> > pipex(4) doesn't rely on netlock anymore. Netlock could be also removed
> > from pppx(4)/pppac(4) layer with next diffs.
> >
> > This diff doesn't contain (*if_output)() magic.
> >
> > PPPOE/L2TP cases were additionally tested by Hrvoje Popovski.
>
> I don't like that pipex_ppp_input() deducts from the protocoll that
> the mutex is held and should be released.
>
> Passing down the locked variable from where it is taken may be a
> bit more maintainable.
>
> But I think it would be better to restructure the code to avoid
> lock releases in functions that did not take them. Could refactoring
> help to make the places where we lock more obvious?
>
> bluhm
>
I removed recursion from pipex_mppe_{input,output}(). This allows to
push session lock deeper.
Now session lock could be released just after we passed 'not_ours'
checks within pipex_common_input(), so pipex_ppp_input() input called
lockless. I added `locked' arg to pipex_common_input() to determine
session lock state.
I see no reason to move 'not_ours' checks outside pipex_common_input()
because this introduce code duplication. Also I see no reason to take
session lock after pipex_ppp_input() call, because it will be released
by caller just after successful return. At least fdrelease() has the
same behavior.
In output path, only pipex_pptp_output() requires to hold session lock
before call.
Index: sys/net/if_pppx.c
===================================================================
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.118
diff -u -p -r1.118 if_pppx.c
--- sys/net/if_pppx.c 2 Jul 2022 08:50:42 -0000 1.118
+++ sys/net/if_pppx.c 15 Jul 2022 10:51:07 -0000
@@ -637,8 +637,6 @@ pppx_add_session(struct pppx_dev *pxd, s
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
if_counters_alloc(ifp);
- /* XXXSMP: be sure pppx_if_qstart() called with NET_LOCK held */
- ifq_set_maxlen(&ifp->if_snd, 1);
/* XXXSMP breaks atomicity */
NET_UNLOCK();
@@ -779,7 +777,6 @@ pppx_if_qstart(struct ifqueue *ifq)
struct mbuf *m;
int proto;
- NET_ASSERT_LOCKED();
while ((m = ifq_dequeue(ifq)) != NULL) {
proto = *mtod(m, int *);
m_adj(m, sizeof(proto));
@@ -1022,8 +1019,6 @@ pppacopen(dev_t dev, int flags, int mode
ifp->if_output = pppac_output;
ifp->if_qstart = pppac_qstart;
ifp->if_ioctl = pppac_ioctl;
- /* XXXSMP: be sure pppac_qstart() called with NET_LOCK held */
- ifq_set_maxlen(&ifp->if_snd, 1);
if_counters_alloc(ifp);
if_attach(ifp);
@@ -1398,7 +1393,6 @@ pppac_qstart(struct ifqueue *ifq)
struct ip ip;
int rv;
- NET_ASSERT_LOCKED();
while ((m = ifq_dequeue(ifq)) != NULL) {
#if NBPFILTER > 0
if (ifp->if_bpf) {
Index: sys/net/pipex.c
===================================================================
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.145
diff -u -p -r1.145 pipex.c
--- sys/net/pipex.c 12 Jul 2022 08:58:53 -0000 1.145
+++ sys/net/pipex.c 15 Jul 2022 10:51:07 -0000
@@ -90,7 +90,6 @@ struct pool mppe_key_pool;
* Locks used to protect global data
* A atomic operation
* I immutable after creation
- * N net lock
* L pipex_list_mtx
*/
@@ -171,7 +170,6 @@ pipex_ioctl(void *ownersc, u_long cmd, c
{
int ret = 0;
- NET_ASSERT_LOCKED();
switch (cmd) {
case PIPEXGSTAT:
ret = pipex_get_stat((struct pipex_session_stat_req *)data,
@@ -567,8 +565,6 @@ pipex_get_stat(struct pipex_session_stat
struct pipex_session *session;
int error = 0;
- NET_ASSERT_LOCKED();
-
session = pipex_lookup_by_session_id(req->psr_protocol,
req->psr_session_id);
if (session == NULL)
@@ -849,13 +845,14 @@ pipex_ppp_output(struct mbuf *m0, struct
{
u_char *cp, hdr[16];
- NET_ASSERT_LOCKED();
-
#ifdef PIPEX_MPPE
if (pipex_session_is_mppe_enabled(session)) {
if (proto == PPP_IP) {
- pipex_mppe_output(m0, session, PPP_IP);
- return;
+ m0 = pipex_mppe_output(m0, session, PPP_IP);
+ if (m0 == NULL)
+ goto drop;
+
+ proto = PPP_COMP;
}
}
#endif /* PIPEX_MPPE */
@@ -880,7 +877,9 @@ pipex_ppp_output(struct mbuf *m0, struct
#endif
#ifdef PIPEX_PPTP
case PIPEX_PROTO_PPTP:
+ mtx_enter(&session->pxs_mtx);
pipex_pptp_output(m0, session, 1, 1);
+ mtx_leave(&session->pxs_mtx);
break;
#endif
#ifdef PIPEX_L2TP
@@ -904,6 +903,10 @@ pipex_ppp_input(struct mbuf *m0, struct
int proto, hlen = 0;
struct mbuf *n;
+#ifdef PIPEX_MPPE
+again:
+#endif
+
KASSERT(m0->m_pkthdr.len >= PIPEX_PPPMINLEN);
proto = pipex_ppp_proto(m0, session, 0, &hlen);
#ifdef PIPEX_MPPE
@@ -915,8 +918,12 @@ pipex_ppp_input(struct mbuf *m0, struct
KASSERT(pipex_session_is_mppe_accepted(session));
m_adj(m0, hlen);
- pipex_mppe_input(m0, session);
- return;
+ m0 = pipex_mppe_input(m0, session);
+ if (m0 == NULL)
+ goto drop;
+ decrypted = 1;
+
+ goto again;
}
if (proto == PPP_CCP) {
if (decrypted)
@@ -977,6 +984,7 @@ pipex_ppp_input(struct mbuf *m0, struct
}
return;
+
drop:
m_freem(m0);
counters_inc(session->stat_counters, pxc_ierrors);
@@ -1109,14 +1117,17 @@ drop:
Static struct mbuf *
pipex_common_input(struct pipex_session *session, struct mbuf *m0, int hlen,
- int plen)
+ int plen, int locked)
{
int proto, ppphlen;
u_char code;
if ((m0->m_pkthdr.len < hlen + PIPEX_PPPMINLEN) ||
- (plen < PIPEX_PPPMINLEN))
+ (plen < PIPEX_PPPMINLEN)) {
+ if (locked)
+ mtx_leave(&session->pxs_mtx);
goto drop;
+ }
proto = pipex_ppp_proto(m0, session, hlen, &ppphlen);
switch (proto) {
@@ -1143,6 +1154,9 @@ pipex_common_input(struct pipex_session
goto not_ours;
}
+ if (locked)
+ mtx_leave(&session->pxs_mtx);
+
/* ok, The packet is for PIPEX */
m_adj(m0, hlen);/* cut off the tunnel protocol header */
@@ -1261,8 +1275,8 @@ pipex_pppoe_input(struct mbuf *m0, struc
sizeof(struct pipex_pppoe_header), &pppoe);
hlen = sizeof(struct ether_header) + sizeof(struct pipex_pppoe_header);
- if ((m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length)))
- == NULL)
+ m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length), 0);
+ if (m0 != NULL)
return (NULL);
m_freem(m0);
counters_inc(session->stat_counters, pxc_ierrors);
@@ -1327,6 +1341,8 @@ pipex_pptp_output(struct mbuf *m0, struc
struct ip *ip;
u_char *cp;
+ MUTEX_ASSERT_LOCKED(&session->pxs_mtx);
+
reqlen = PIPEX_IPGRE_HDRLEN + (has_seq + has_ack) * 4;
len = 0;
@@ -1480,7 +1496,6 @@ pipex_pptp_input(struct mbuf *m0, struct
struct pipex_pptp_session *pptp_session;
int rewind = 0;
- NET_ASSERT_LOCKED();
KASSERT(m0->m_pkthdr.len >= PIPEX_IPGRE_HDRLEN);
pptp_session = &session->proto.pptp;
@@ -1510,6 +1525,9 @@ pipex_pptp_input(struct mbuf *m0, struct
seqp = cp;
GETLONG(seq, cp);
}
+
+ mtx_enter(&session->pxs_mtx);
+
if (has_ack) {
ackp = cp;
GETLONG(ack, cp);
@@ -1553,16 +1571,30 @@ pipex_pptp_input(struct mbuf *m0, struct
pipex_pptp_output(NULL, session, 0, 1);
}
- if ((m0 = pipex_common_input(session, m0, hlen, ntohs(gre->len)))
- == NULL) {
- /* ok, The packet is for PIPEX */
- if (!rewind)
- session->proto.pptp.rcv_gap += nseq;
+ /*
+ * The following pipex_common_input() will release `pxs_mtx'
+ * deep within if the packet will be consumed. In the error
+ * path lock will be held all the time. So increment `rcv_gap'
+ * here, and on the error path back it out, no atomicity will
+ * be lost in all cases.
+ */
+ if (!rewind)
+ session->proto.pptp.rcv_gap += nseq;
+ m0 = pipex_common_input(session, m0, hlen, ntohs(gre->len), 1);
+ if (m0 == NULL) {
+ /*
+ * pipex_common_input() releases lock if the
+ * packet was consumed.
+ */
return (NULL);
}
if (rewind)
goto out_seq;
+ else {
+ /* The packet is not ours, back out `rcv_gap'. */
+ session->proto.pptp.rcv_gap -= nseq;
+ }
not_ours:
seq--; /* revert original seq value */
@@ -1591,6 +1623,8 @@ not_ours:
PUTLONG(ack, ackp);
}
+ mtx_leave(&session->pxs_mtx);
+
return (m0);
out_seq:
pipex_session_log(session, LOG_DEBUG,
@@ -1599,6 +1633,7 @@ out_seq:
pptp_session->rcv_nxt + pptp_session->maxwinsz,
ack, pptp_session->snd_una,
pptp_session->snd_nxt);
+ mtx_leave(&session->pxs_mtx);
/* FALLTHROUGH */
drop:
@@ -1728,6 +1763,8 @@ pipex_pptp_userland_output(struct mbuf *
gre = mtod(m0, struct pipex_gre_header *);
cp = PIPEX_SEEK_NEXTHDR(gre, sizeof(struct pipex_gre_header), u_char *);
+ mtx_enter(&session->pxs_mtx);
+
/*
* overwrite sequence numbers to adjust a gap between pipex and
* userland.
@@ -1748,6 +1785,8 @@ pipex_pptp_userland_output(struct mbuf *
session->proto.pptp.rcv_acked = val32;
}
+ mtx_leave(&session->pxs_mtx);
+
return (m0);
}
#endif /* PIPEX_PPTP */
@@ -1813,11 +1852,14 @@ pipex_l2tp_output(struct mbuf *m0, struc
if (pipex_session_is_l2tp_data_sequencing_on(session)) {
seq = (struct pipex_l2tp_seq_header *)(l2tp + 1);
l2tp->flagsver |= PIPEX_L2TP_FLAG_SEQUENCE;
+
+ mtx_enter(&session->pxs_mtx);
seq->ns = htons(session->proto.l2tp.ns_nxt);
session->proto.l2tp.ns_nxt++;
session->proto.l2tp.ns_gap++;
session->proto.l2tp.nr_acked = session->proto.l2tp.nr_nxt - 1;
seq->nr = htons(session->proto.l2tp.nr_acked);
+ mtx_leave(&session->pxs_mtx);
}
l2tp->flagsver = htons(l2tp->flagsver);
@@ -1844,14 +1886,19 @@ pipex_l2tp_output(struct mbuf *m0, struc
ip->ip_tos = 0;
ip->ip_off = 0;
+ mtx_enter(&session->pxs_mtx);
if (session->proto.l2tp.ipsecflowinfo > 0) {
if ((mtag = m_tag_get(PACKET_TAG_IPSEC_FLOWINFO,
- sizeof(u_int32_t), M_NOWAIT)) == NULL)
+ sizeof(u_int32_t), M_NOWAIT)) == NULL) {
+ mtx_leave(&session->pxs_mtx);
goto drop;
+ }
+
*(u_int32_t *)(mtag + 1) =
session->proto.l2tp.ipsecflowinfo;
m_tag_prepend(m0, mtag);
}
+ mtx_leave(&session->pxs_mtx);
ip_send(m0);
break;
@@ -1940,17 +1987,15 @@ pipex_l2tp_input(struct mbuf *m0, int of
uint32_t ipsecflowinfo)
{
struct pipex_l2tp_session *l2tp_session;
- int length, offset, hlen, nseq;
- u_char *cp, *nsp, *nrp;
+ int length = 0, offset = 0, hlen, nseq;
+ u_char *cp, *nsp = NULL, *nrp = NULL;
uint16_t flags, ns = 0, nr = 0;
int rewind = 0;
- NET_ASSERT_LOCKED();
+ mtx_enter(&session->pxs_mtx);
- length = offset = ns = nr = 0;
l2tp_session = &session->proto.l2tp;
l2tp_session->ipsecflowinfo = ipsecflowinfo;
- nsp = nrp = NULL;
m_copydata(m0, off0, sizeof(flags), &flags);
@@ -2008,15 +2053,31 @@ pipex_l2tp_input(struct mbuf *m0, int of
length -= hlen + offset;
hlen += off0 + offset;
- if ((m0 = pipex_common_input(session, m0, hlen, length)) == NULL) {
- /* ok, The packet is for PIPEX */
- if (!rewind)
- session->proto.l2tp.nr_gap += nseq;
+
+ /*
+ * The following pipex_common_input() will release `pxs_mtx'
+ * deep within if the packet will be consumed. In the error
+ * path lock will be held all the time. So increment `nr_gap'
+ * here, and on the error path back it out, no atomicity will
+ * be lost in all cases.
+ */
+ if (!rewind)
+ session->proto.l2tp.nr_gap += nseq;
+ m0 = pipex_common_input(session, m0, hlen, length, 1);
+ if (m0 == NULL) {
+ /*
+ * pipex_common_input() releases lock if the
+ * packet was consumed.
+ */
return (NULL);
}
if (rewind)
goto out_seq;
+ else {
+ /* The packet is not ours, backout `nr_gap'. */
+ session->proto.l2tp.nr_gap -= nseq;
+ }
/*
* overwrite sequence numbers to adjust a gap between pipex and
@@ -2042,15 +2103,18 @@ pipex_l2tp_input(struct mbuf *m0, int of
PUTSHORT(nr, nrp);
}
+ mtx_leave(&session->pxs_mtx);
+
return (m0);
out_seq:
pipex_session_log(session, LOG_DEBUG,
"Received bad data packet: out of sequence: seq=%u(%u-) "
"ack=%u(%u-%u)", ns, l2tp_session->nr_nxt, nr, l2tp_session->ns_una,
l2tp_session->ns_nxt);
-
/* FALLTHROUGH */
drop:
+ mtx_leave(&session->pxs_mtx);
+
m_freem(m0);
counters_inc(session->stat_counters, pxc_ierrors);
@@ -2175,6 +2239,8 @@ pipex_l2tp_userland_output(struct mbuf *
ns = ntohs(seq->ns);
nr = ntohs(seq->nr);
+ mtx_enter(&session->pxs_mtx);
+
ns += session->proto.l2tp.ns_gap;
seq->ns = htons(ns);
session->proto.l2tp.ns_nxt++;
@@ -2184,6 +2250,8 @@ pipex_l2tp_userland_output(struct mbuf *
if (SEQ16_GT(nr, session->proto.l2tp.nr_acked))
session->proto.l2tp.nr_acked = nr;
+ mtx_leave(&session->pxs_mtx);
+
return (m0);
}
#endif /* PIPEX_L2TP */
@@ -2342,7 +2410,7 @@ mppe_key_change(struct pipex_mppe *mppe)
}
}
-Static void
+struct mbuf *
pipex_mppe_input(struct mbuf *m0, struct pipex_session *session)
{
int pktloss, encrypt, flushed, m, n, len;
@@ -2443,7 +2511,7 @@ pipex_mppe_input(struct mbuf *m0, struct
/* Send CCP ResetReq */
PIPEX_DBG((session, LOG_DEBUG, "CCP SendResetReq"));
-
+
mtx_enter(&session->pxs_mtx);
ccp_id = session->ccp_id;
session->ccp_id++;
@@ -2483,15 +2551,13 @@ pipex_mppe_input(struct mbuf *m0, struct
if (m0->m_pkthdr.len < PIPEX_PPPMINLEN)
goto drop;
- pipex_ppp_input(m0, session, 1);
-
- return;
+ return (m0);
drop:
m_freem(m0);
- counters_inc(session->stat_counters, pxc_ierrors);
+ return (NULL);
}
-Static void
+struct mbuf *
pipex_mppe_output(struct mbuf *m0, struct pipex_session *session,
uint16_t protocol)
{
@@ -2515,7 +2581,7 @@ pipex_mppe_output(struct mbuf *m0, struc
m = m_dup_pkt(m0, max_linkhdr, M_NOWAIT);
m_freem(m0);
if (m == NULL)
- goto drop;
+ return (NULL);
m0 = m;
break;
}
@@ -2523,7 +2589,7 @@ pipex_mppe_output(struct mbuf *m0, struc
/* prepend mppe header */
M_PREPEND(m0, sizeof(struct mppe_header), M_NOWAIT);
if (m0 == NULL)
- goto drop;
+ return (NULL);
hdr = mtod(m0, struct mppe_header *);
hdr->protocol = protocol;
@@ -2577,11 +2643,7 @@ pipex_mppe_output(struct mbuf *m0, struc
mtx_leave(&mppe->pxm_mtx);
- pipex_ppp_output(m0, session, PPP_COMP);
-
- return;
-drop:
- counters_inc(session->stat_counters, pxc_oerrors);
+ return (m0);
}
Static void
Index: sys/net/pipex_local.h
===================================================================
RCS file: /cvs/src/sys/net/pipex_local.h,v
retrieving revision 1.48
diff -u -p -r1.48 pipex_local.h
--- sys/net/pipex_local.h 12 Jul 2022 08:58:53 -0000 1.48
+++ sys/net/pipex_local.h 15 Jul 2022 10:51:07 -0000
@@ -56,8 +56,8 @@ extern struct mutex pipex_list_mtx;
/*
* Locks used to protect struct members:
+ * A atomic operation
* I immutable after creation
- * N net lock
* L pipex_list_mtx
* s this pipex_session' `pxs_mtx'
* m this pipex_mppe' `pxm_mtx'
@@ -91,14 +91,14 @@ struct pipex_pppoe_session {
#ifdef PIPEX_PPTP
struct pipex_pptp_session {
/* sequence number gap between pipex and userland */
- int32_t snd_gap; /* [N] gap of our sequence */
- int32_t rcv_gap; /* [N] gap of peer's sequence */
- int32_t ul_snd_una; /* [N] userland send acked seq */
-
- uint32_t snd_nxt; /* [N] send next */
- uint32_t rcv_nxt; /* [N] receive next */
- uint32_t snd_una; /* [N] send acked sequence */
- uint32_t rcv_acked; /* [N] recv acked sequence */
+ int32_t snd_gap; /* [s] gap of our sequence */
+ int32_t rcv_gap; /* [s] gap of peer's sequence */
+ int32_t ul_snd_una; /* [s] userland send acked seq */
+
+ uint32_t snd_nxt; /* [s] send next */
+ uint32_t rcv_nxt; /* [s] receive next */
+ uint32_t snd_una; /* [s] send acked sequence */
+ uint32_t rcv_acked; /* [s] recv acked sequence */
int winsz; /* [I] windows size */
int maxwinsz; /* [I] max windows size */
@@ -141,16 +141,16 @@ struct pipex_l2tp_session {
uint32_t option_flags; /* [I] protocol options */
- int16_t ns_gap; /* [N] gap between userland and pipex */
- int16_t nr_gap; /* [N] gap between userland and pipex */
- uint16_t ul_ns_una; /* [N] unacked sequence number (userland) */
-
- uint16_t ns_nxt; /* [N] next sequence number to send */
- uint16_t ns_una; /* [N] unacked sequence number to send */
-
- uint16_t nr_nxt; /* [N] next sequence number to recv */
- uint16_t nr_acked; /* [N] acked sequence number to recv */
- uint32_t ipsecflowinfo; /* [N] IPsec SA flow id for NAT-T */
+ int16_t ns_gap; /* [s] gap between userland and pipex */
+ int16_t nr_gap; /* [s] gap between userland and pipex */
+ uint16_t ul_ns_una; /* [s] unacked sequence number (userland) */
+
+ uint16_t ns_nxt; /* [s] next sequence number to send */
+ uint16_t ns_una; /* [s] unacked sequence number to send */
+
+ uint16_t nr_nxt; /* [s] next sequence number to recv */
+ uint16_t nr_acked; /* [s] acked sequence number to recv */
+ uint32_t ipsecflowinfo; /* [s] IPsec SA flow id for NAT-T */
};
#endif /* PIPEX_L2TP */
@@ -197,7 +197,7 @@ struct pipex_session {
struct sockaddr_in6 ip6_address; /* [I] remote IPv6 address */
int ip6_prefixlen; /* [I] remote IPv6 prefixlen */
- u_int ifindex; /* [N] interface index */
+ u_int ifindex; /* [A] interface index */
void *ownersc; /* [I] owner context */
uint32_t ppp_flags; /* [I] configure flags */
@@ -429,7 +429,7 @@ void pipex_ip_input (st
void pipex_ip6_input (struct mbuf *, struct pipex_session *);
#endif
struct mbuf *pipex_common_input(struct pipex_session *,
- struct mbuf *, int, int);
+ struct mbuf *, int, int, int);
#ifdef PIPEX_PPPOE
void pipex_pppoe_output (struct mbuf *, struct pipex_session
*);
@@ -449,8 +449,10 @@ void pipex_mppe_init (s
void GetNewKeyFromSHA (u_char *, u_char *, int, u_char *);
void pipex_mppe_reduce_key (struct pipex_mppe *);
void mppe_key_change (struct pipex_mppe *);
-void pipex_mppe_input (struct mbuf *, struct pipex_session *);
-void pipex_mppe_output (struct mbuf *, struct pipex_session
*, uint16_t);
+struct mbuf *pipex_mppe_input (struct mbuf *,
+ struct pipex_session *);
+struct mbuf *pipex_mppe_output (struct mbuf *,
+ struct pipex_session *, uint16_t);
void pipex_ccp_input (struct mbuf *, struct pipex_session *);
int pipex_ccp_output (struct pipex_session *, int, int);
#endif