Hi,
markus@ has found a possible stack overrun in the network as we
have removed some queueing. Now lo(4) output calls the ip input
routines without a queue. So if a packet loops through the kernel,
the kernel stack fills up.
root@q70:.../~# route add 1.2.3.4 127.0.0.1
add host 1.2.3.4: gateway 127.0.0.1
root@q70:.../~# ping 1.2.3.4
PING 1.2.3.4 (1.2.3.4): 56 data bytes
kernel: double fault trap, code=0
Stopped at pf_test_state_icmp+0x177: callq pf_icmp_state_lookup+0x4
My main idea to fix this, is to uses the M_LOOP flag. This is set
when a packet goes through the loopback interface. I want to avoid
an additional queue if the packet goes to lo(4) only once. As there
may be gif(4), bridge(4), pair(4), ipsec(4), ... setups that
legitimately pass lo(4) more than once, I use the interface input
queue for these cases.
When pinging 127.0.0.1, the reflected packet still has the M_LOOP
flag set. When reusing a mbuf at the upper end of the network
stack, I think we should strip off the mbuf properties. It is a
new packet. The m_resethdr() function does that for us. For socket
splicing I have introduced ph_loopcnt to prevent looping at the
upper end of the stack. Although not necessary in icmp reflect, I
think it is a good idea to increase that here, too.
The ping 1.2.3.4 works now as it runs through ip forward. There the
TTL is decremented and the packet is finally dropped.
ok?
bluhm
Index: net/if_loop.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
retrieving revision 1.81
diff -u -p -r1.81 if_loop.c
--- net/if_loop.c 19 Apr 2017 15:21:54 -0000 1.81
+++ net/if_loop.c 16 Oct 2017 13:51:03 -0000
@@ -143,6 +143,7 @@
int loioctl(struct ifnet *, u_long, caddr_t);
void loopattach(int);
void lortrequest(struct ifnet *, int, struct rtentry *);
+int loinput(struct ifnet *, struct mbuf *, void *);
int looutput(struct ifnet *,
struct mbuf *, struct sockaddr *, struct rtentry *);
@@ -191,6 +192,7 @@ loop_clone_create(struct if_clone *ifc,
#if NBPFILTER > 0
bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
#endif
+ if_ih_insert(ifp, loinput, NULL);
return (0);
}
@@ -207,11 +209,26 @@ loop_clone_destroy(struct ifnet *ifp)
}
int
+loinput(struct ifnet *ifp, struct mbuf *m, void *cookie)
+{
+ int error;
+
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("%s: no header mbuf", __func__);
+
+ error = if_input_local(ifp, m, m->m_pkthdr.ph_family);
+ if (error)
+ ifp->if_ierrors++;
+
+ return (1);
+}
+
+int
looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
struct rtentry *rt)
{
if ((m->m_flags & M_PKTHDR) == 0)
- panic("looutput: no header mbuf");
+ panic("%s: no header mbuf", __func__);
if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
m_freem(m);
@@ -219,7 +236,16 @@ looutput(struct ifnet *ifp, struct mbuf
rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
}
- return (if_input_local(ifp, m, dst->sa_family));
+ /* Use the quick path only once to avoid stack overflow. */
+ if ((m->m_flags & M_LOOP) == 0)
+ return (if_input_local(ifp, m, dst->sa_family));
+
+ m->m_pkthdr.ph_family = dst->sa_family;
+ if (mq_enqueue(&ifp->if_inputqueue, m))
+ return ENOBUFS;
+ task_add(softnettq, ifp->if_inputtask);
+
+ return (0);
}
void
Index: netinet/ip_icmp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.172
diff -u -p -r1.172 ip_icmp.c
--- netinet/ip_icmp.c 9 Oct 2017 08:35:38 -0000 1.172
+++ netinet/ip_icmp.c 17 Oct 2017 17:40:38 -0000
@@ -714,10 +714,13 @@ icmp_reflect(struct mbuf *m, struct mbuf
return (EHOSTUNREACH);
}
-#if NPF > 0
- pf_pkt_addr_changed(m);
-#endif
+ if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) {
+ m_freem(m);
+ return (ELOOP);
+ }
rtableid = m->m_pkthdr.ph_rtableid;
+ m_resethdr(m);
+ m->m_pkthdr.ph_rtableid = rtableid;
/*
* If the incoming packet was addressed directly to us,
Index: netinet6/icmp6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/icmp6.c,v
retrieving revision 1.217
diff -u -p -r1.217 icmp6.c
--- netinet6/icmp6.c 9 Oct 2017 08:35:38 -0000 1.217
+++ netinet6/icmp6.c 17 Oct 2017 17:40:53 -0000
@@ -1044,6 +1044,7 @@ icmp6_reflect(struct mbuf *m, size_t off
struct icmp6_hdr *icmp6;
struct in6_addr t, *src = NULL;
struct sockaddr_in6 sa6_src, sa6_dst;
+ u_int rtableid;
CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
@@ -1056,6 +1057,12 @@ icmp6_reflect(struct mbuf *m, size_t off
goto bad;
}
+ if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP)
+ goto bad;
+ rtableid = m->m_pkthdr.ph_rtableid;
+ m_resethdr(m);
+ m->m_pkthdr.ph_rtableid = rtableid;
+
/*
* If there are extra headers between IPv6 and ICMPv6, strip
* off that header first.
@@ -1114,7 +1121,7 @@ icmp6_reflect(struct mbuf *m, size_t off
* but is possible (for example) when we encounter an error while
* forwarding procedure destined to a duplicated address of ours.
*/
- rt = rtalloc(sin6tosa(&sa6_dst), 0, m->m_pkthdr.ph_rtableid);
+ rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid);
if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
!ISSET(ifatoia6(rt->rt_ifa)->ia6_flags,
IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) {
@@ -1129,8 +1136,7 @@ icmp6_reflect(struct mbuf *m, size_t off
* that we do not own. Select a source address based on the
* source address of the erroneous packet.
*/
- rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE,
- m->m_pkthdr.ph_rtableid);
+ rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid);
if (!rtisvalid(rt)) {
char addr[INET6_ADDRSTRLEN];
@@ -1168,9 +1174,6 @@ icmp6_reflect(struct mbuf *m, size_t off
* Note that only echo and node information replies are affected,
* since the length of ICMP6 errors is limited to the minimum MTU.
*/
-#if NPF > 0
- pf_pkt_addr_changed(m);
-#endif
ip6_send(m);
return;
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.231
diff -u -p -r1.231 mbuf.h
--- sys/mbuf.h 23 Jun 2017 11:18:12 -0000 1.231
+++ sys/mbuf.h 16 Oct 2017 13:47:12 -0000
@@ -134,6 +134,7 @@ struct pkthdr {
u_int ph_rtableid; /* routing table id */
u_int ph_ifidx; /* rcv interface index */
u_int8_t ph_loopcnt; /* mbuf is looping in kernel */
+ u_int8_t ph_family; /* af, used when queueing */
struct pkthdr_pf pf;
};