Hello,
</snip>
thanks for great explanation of the problem. I like your approach.
I have just two small nitpicks/questions see below.
>
> Index: net/if_loop.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
> retrieving revision 1.81
> diff -u -p -r1.81 if_loop.c
> --- net/if_loop.c 19 Apr 2017 15:21:54 -0000 1.81
> +++ net/if_loop.c 16 Oct 2017 13:51:03 -0000
> @@ -143,6 +143,7 @@
> int loioctl(struct ifnet *, u_long, caddr_t);
> void loopattach(int);
> void lortrequest(struct ifnet *, int, struct rtentry *);
> +int loinput(struct ifnet *, struct mbuf *, void *);
> int looutput(struct ifnet *,
> struct mbuf *, struct sockaddr *, struct rtentry *);
>
> @@ -191,6 +192,7 @@ loop_clone_create(struct if_clone *ifc,
> #if NBPFILTER > 0
> bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
> #endif
> + if_ih_insert(ifp, loinput, NULL);
I think you also want to add if_ih_remove() to loop_clone_destroy().
--------8<---------------8<---------------8<------------------8<--------
199 int
200 loop_clone_destroy(struct ifnet *ifp)
201 {
202 if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
203 return (EPERM);
204
205 if_detach(ifp);
206 if_ih_remove(ifp, loinput, NULL);
207
208 free(ifp, M_DEVBUF, sizeof(*ifp));
209 return (0);
210 }
211
--------8<---------------8<---------------8<------------------8<--------
I feel it should be called after if_detach().
</snip>
>
> @@ -1168,9 +1174,6 @@ icmp6_reflect(struct mbuf *m, size_t off
> * Note that only echo and node information replies are affected,
> * since the length of ICMP6 errors is limited to the minimum MTU.
> */
> -#if NPF > 0
> - pf_pkt_addr_changed(m);
> -#endif
> ip6_send(m);
> return;
Also I would like to kindly ask you to fix up my mess, when you are there.
the comment above should be moved to ip6_send_dispatch():
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 4e588d83394..bc19e5d8eb6 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -1481,6 +1481,13 @@ ip6_send_dispatch(void *xmq)
#endif /* IPSEC */
while ((m = ml_dequeue(&ml)) != NULL) {
+ /*
+ * To avoid a "too big" situation at an intermediate router and
+ * the path MTU discovery process, specify the IPV6_MINMTU
+ * flag. Note that only echo and node information replies are
+ * affected, since the length of ICMP6 errors is limited to the
+ * minimum MTU.
+ */
ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
}
NET_UNLOCK();
--------8<---------------8<---------------8<------------------8<--------
I've forgot to move the comment, when turning ip*_send() functions to tasks
long time ago:
--------8<---------------8<---------------8<------------------8<--------
revision 1.152
date: 2015/12/03 21:11:54; author: sashan; state: Exp; lines: +36 -1;\
commitid: nhuzteWvIf6uiITt;
ip_send()/ip6_send() allow PF to send response packet in ipsoftnet task.
this avoids current recursion to pf_test() function. the change also
switches icmp_error()/icmp6_error() to use ip_send()/ip6_send() so
they are safe for PF.
The idea comes from Markus Friedl. bluhm, mikeb and mpi helped me
a lot to get it into shape.
--------8<---------------8<---------------8<------------------8<--------
thanks a lot in advance, for taking care about it.
The updated patch with two changes discussed above is below.
thanks and
regards
sasha
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
index 47f88306699..e024aca74f4 100644
--- a/sys/net/if_loop.c
+++ b/sys/net/if_loop.c
@@ -143,6 +143,7 @@
int loioctl(struct ifnet *, u_long, caddr_t);
void loopattach(int);
void lortrequest(struct ifnet *, int, struct rtentry *);
+int loinput(struct ifnet *, struct mbuf *, void *);
int looutput(struct ifnet *,
struct mbuf *, struct sockaddr *, struct rtentry *);
@@ -191,6 +192,7 @@ loop_clone_create(struct if_clone *ifc, int unit)
#if NBPFILTER > 0
bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
#endif
+ if_ih_insert(ifp, loinput, NULL);
return (0);
}
@@ -201,17 +203,33 @@ loop_clone_destroy(struct ifnet *ifp)
return (EPERM);
if_detach(ifp);
+ if_ih_remove(ifp, loinput, NULL);
free(ifp, M_DEVBUF, sizeof(*ifp));
return (0);
}
+int
+loinput(struct ifnet *ifp, struct mbuf *m, void *cookie)
+{
+ int error;
+
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("%s: no header mbuf", __func__);
+
+ error = if_input_local(ifp, m, m->m_pkthdr.ph_family);
+ if (error)
+ ifp->if_ierrors++;
+
+ return (1);
+}
+
int
looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
struct rtentry *rt)
{
if ((m->m_flags & M_PKTHDR) == 0)
- panic("looutput: no header mbuf");
+ panic("%s: no header mbuf", __func__);
if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
m_freem(m);
@@ -219,7 +237,16 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct
sockaddr *dst,
rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
}
- return (if_input_local(ifp, m, dst->sa_family));
+ /* Use the quick path only once to avoid stack overflow. */
+ if ((m->m_flags & M_LOOP) == 0)
+ return (if_input_local(ifp, m, dst->sa_family));
+
+ m->m_pkthdr.ph_family = dst->sa_family;
+ if (mq_enqueue(&ifp->if_inputqueue, m))
+ return ENOBUFS;
+ task_add(softnettq, ifp->if_inputtask);
+
+ return (0);
}
void
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index 954f4219769..3f6c8b83aa0 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -714,10 +714,13 @@ icmp_reflect(struct mbuf *m, struct mbuf **op, struct
in_ifaddr *ia)
return (EHOSTUNREACH);
}
-#if NPF > 0
- pf_pkt_addr_changed(m);
-#endif
+ if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) {
+ m_freem(m);
+ return (ELOOP);
+ }
rtableid = m->m_pkthdr.ph_rtableid;
+ m_resethdr(m);
+ m->m_pkthdr.ph_rtableid = rtableid;
/*
* If the incoming packet was addressed directly to us,
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 3becc223313..a43c11010e4 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -1044,6 +1044,7 @@ icmp6_reflect(struct mbuf *m, size_t off)
struct icmp6_hdr *icmp6;
struct in6_addr t, *src = NULL;
struct sockaddr_in6 sa6_src, sa6_dst;
+ u_int rtableid;
CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
@@ -1056,6 +1057,12 @@ icmp6_reflect(struct mbuf *m, size_t off)
goto bad;
}
+ if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP)
+ goto bad;
+ rtableid = m->m_pkthdr.ph_rtableid;
+ m_resethdr(m);
+ m->m_pkthdr.ph_rtableid = rtableid;
+
/*
* If there are extra headers between IPv6 and ICMPv6, strip
* off that header first.
@@ -1114,7 +1121,7 @@ icmp6_reflect(struct mbuf *m, size_t off)
* but is possible (for example) when we encounter an error while
* forwarding procedure destined to a duplicated address of ours.
*/
- rt = rtalloc(sin6tosa(&sa6_dst), 0, m->m_pkthdr.ph_rtableid);
+ rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid);
if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
!ISSET(ifatoia6(rt->rt_ifa)->ia6_flags,
IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) {
@@ -1129,8 +1136,7 @@ icmp6_reflect(struct mbuf *m, size_t off)
* that we do not own. Select a source address based on the
* source address of the erroneous packet.
*/
- rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE,
- m->m_pkthdr.ph_rtableid);
+ rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid);
if (!rtisvalid(rt)) {
char addr[INET6_ADDRSTRLEN];
@@ -1162,15 +1168,6 @@ icmp6_reflect(struct mbuf *m, size_t off)
m->m_flags &= ~(M_BCAST|M_MCAST);
- /*
- * To avoid a "too big" situation at an intermediate router
- * and the path MTU discovery process, specify the IPV6_MINMTU flag.
- * Note that only echo and node information replies are affected,
- * since the length of ICMP6 errors is limited to the minimum MTU.
- */
-#if NPF > 0
- pf_pkt_addr_changed(m);
-#endif
ip6_send(m);
return;
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 4e588d83394..bc19e5d8eb6 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -1481,6 +1481,13 @@ ip6_send_dispatch(void *xmq)
#endif /* IPSEC */
while ((m = ml_dequeue(&ml)) != NULL) {
+ /*
+ * To avoid a "too big" situation at an intermediate router and
+ * the path MTU discovery process, specify the IPV6_MINMTU
+ * flag. Note that only echo and node information replies are
+ * affected, since the length of ICMP6 errors is limited to the
+ * minimum MTU.
+ */
ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
}
NET_UNLOCK();
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 422f49f1b65..9141303782c 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -134,6 +134,7 @@ struct pkthdr {
u_int ph_rtableid; /* routing table id */
u_int ph_ifidx; /* rcv interface index */
u_int8_t ph_loopcnt; /* mbuf is looping in kernel */
+ u_int8_t ph_family; /* af, used when queueing */
struct pkthdr_pf pf;
};