Hello,

</snip>

thanks for great explanation of the problem. I like your approach.

I have just two small nitpicks/questions see below.

> 
> Index: net/if_loop.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
> retrieving revision 1.81
> diff -u -p -r1.81 if_loop.c
> --- net/if_loop.c     19 Apr 2017 15:21:54 -0000      1.81
> +++ net/if_loop.c     16 Oct 2017 13:51:03 -0000
> @@ -143,6 +143,7 @@
>  int  loioctl(struct ifnet *, u_long, caddr_t);
>  void loopattach(int);
>  void lortrequest(struct ifnet *, int, struct rtentry *);
> +int  loinput(struct ifnet *, struct mbuf *, void *);
>  int  looutput(struct ifnet *,
>           struct mbuf *, struct sockaddr *, struct rtentry *);
>  
> @@ -191,6 +192,7 @@ loop_clone_create(struct if_clone *ifc, 
>  #if NBPFILTER > 0
>       bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
>  #endif
> +     if_ih_insert(ifp, loinput, NULL);

    I think you also want to add if_ih_remove() to loop_clone_destroy().

--------8<---------------8<---------------8<------------------8<--------
199 int
200 loop_clone_destroy(struct ifnet *ifp)
201 {
202         if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
203                 return (EPERM);
204 
205         if_detach(ifp);
206         if_ih_remove(ifp, loinput, NULL);
207 
208         free(ifp, M_DEVBUF, sizeof(*ifp));
209         return (0);
210 }
211 
--------8<---------------8<---------------8<------------------8<--------
    I feel it should be called after if_detach().

</snip>
>  
> @@ -1168,9 +1174,6 @@ icmp6_reflect(struct mbuf *m, size_t off
>        * Note that only echo and node information replies are affected,
>        * since the length of ICMP6 errors is limited to the minimum MTU.
>        */
> -#if NPF > 0
> -     pf_pkt_addr_changed(m);
> -#endif
>       ip6_send(m);
>       return;

    Also I would like to kindly ask you to fix up my mess, when you are there.
    the comment above should be moved to ip6_send_dispatch():

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 4e588d83394..bc19e5d8eb6 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -1481,6 +1481,13 @@ ip6_send_dispatch(void *xmq)
 #endif /* IPSEC */
 
        while ((m = ml_dequeue(&ml)) != NULL) {
+               /*
+                * To avoid a "too big" situation at an intermediate router and
+                * the path MTU discovery process, specify the IPV6_MINMTU
+                * flag.  Note that only echo and node information replies are
+                * affected, since the length of ICMP6 errors is limited to the
+                * minimum MTU.
+                */
                ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
        }
        NET_UNLOCK();
--------8<---------------8<---------------8<------------------8<--------

    I've forgot to move the comment, when turning ip*_send() functions to tasks
    long time ago:
--------8<---------------8<---------------8<------------------8<--------
revision 1.152
date: 2015/12/03 21:11:54;  author: sashan;  state: Exp;  lines: +36 -1;\
  commitid: nhuzteWvIf6uiITt;
ip_send()/ip6_send() allow PF to send response packet in ipsoftnet task.
this avoids current recursion to pf_test() function. the change also
switches icmp_error()/icmp6_error() to use ip_send()/ip6_send() so
they are safe for PF.

The idea comes from Markus Friedl. bluhm, mikeb and mpi helped me
a lot to get it into shape.
--------8<---------------8<---------------8<------------------8<--------

    thanks a lot in advance, for taking care about it.

The updated patch with two changes discussed above is below.

thanks and
regards
sasha

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
index 47f88306699..e024aca74f4 100644
--- a/sys/net/if_loop.c
+++ b/sys/net/if_loop.c
@@ -143,6 +143,7 @@
 int    loioctl(struct ifnet *, u_long, caddr_t);
 void   loopattach(int);
 void   lortrequest(struct ifnet *, int, struct rtentry *);
+int    loinput(struct ifnet *, struct mbuf *, void *);
 int    looutput(struct ifnet *,
            struct mbuf *, struct sockaddr *, struct rtentry *);
 
@@ -191,6 +192,7 @@ loop_clone_create(struct if_clone *ifc, int unit)
 #if NBPFILTER > 0
        bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
 #endif
+       if_ih_insert(ifp, loinput, NULL);
        return (0);
 }
 
@@ -201,17 +203,33 @@ loop_clone_destroy(struct ifnet *ifp)
                return (EPERM);
 
        if_detach(ifp);
+       if_ih_remove(ifp, loinput, NULL);
 
        free(ifp, M_DEVBUF, sizeof(*ifp));
        return (0);
 }
 
+int
+loinput(struct ifnet *ifp, struct mbuf *m, void *cookie)
+{
+       int error;
+
+       if ((m->m_flags & M_PKTHDR) == 0)
+               panic("%s: no header mbuf", __func__);
+
+       error = if_input_local(ifp, m, m->m_pkthdr.ph_family);
+       if (error)
+               ifp->if_ierrors++;
+
+       return (1);
+}
+
 int
 looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
     struct rtentry *rt)
 {
        if ((m->m_flags & M_PKTHDR) == 0)
-               panic("looutput: no header mbuf");
+               panic("%s: no header mbuf", __func__);
 
        if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
                m_freem(m);
@@ -219,7 +237,16 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct 
sockaddr *dst,
                        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
        }
 
-       return (if_input_local(ifp, m, dst->sa_family));
+       /* Use the quick path only once to avoid stack overflow. */
+       if ((m->m_flags & M_LOOP) == 0)
+               return (if_input_local(ifp, m, dst->sa_family));
+
+       m->m_pkthdr.ph_family = dst->sa_family;
+       if (mq_enqueue(&ifp->if_inputqueue, m))
+               return ENOBUFS;
+       task_add(softnettq, ifp->if_inputtask);
+
+       return (0);
 }
 
 void
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index 954f4219769..3f6c8b83aa0 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -714,10 +714,13 @@ icmp_reflect(struct mbuf *m, struct mbuf **op, struct 
in_ifaddr *ia)
                return (EHOSTUNREACH);
        }
 
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
+       if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) {
+               m_freem(m);
+               return (ELOOP);
+       }
        rtableid = m->m_pkthdr.ph_rtableid;
+       m_resethdr(m);
+       m->m_pkthdr.ph_rtableid = rtableid;
 
        /*
         * If the incoming packet was addressed directly to us,
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 3becc223313..a43c11010e4 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -1044,6 +1044,7 @@ icmp6_reflect(struct mbuf *m, size_t off)
        struct icmp6_hdr *icmp6;
        struct in6_addr t, *src = NULL;
        struct sockaddr_in6 sa6_src, sa6_dst;
+       u_int rtableid;
 
        CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
 
@@ -1056,6 +1057,12 @@ icmp6_reflect(struct mbuf *m, size_t off)
                goto bad;
        }
 
+       if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP)
+               goto bad;
+       rtableid = m->m_pkthdr.ph_rtableid;
+       m_resethdr(m);
+       m->m_pkthdr.ph_rtableid = rtableid;
+
        /*
         * If there are extra headers between IPv6 and ICMPv6, strip
         * off that header first.
@@ -1114,7 +1121,7 @@ icmp6_reflect(struct mbuf *m, size_t off)
         * but is possible (for example) when we encounter an error while
         * forwarding procedure destined to a duplicated address of ours.
         */
-       rt = rtalloc(sin6tosa(&sa6_dst), 0, m->m_pkthdr.ph_rtableid);
+       rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid);
        if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
            !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags,
            IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) {
@@ -1129,8 +1136,7 @@ icmp6_reflect(struct mbuf *m, size_t off)
                 * that we do not own.  Select a source address based on the
                 * source address of the erroneous packet.
                 */
-               rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE,
-                   m->m_pkthdr.ph_rtableid);
+               rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid);
                if (!rtisvalid(rt)) {
                        char addr[INET6_ADDRSTRLEN];
 
@@ -1162,15 +1168,6 @@ icmp6_reflect(struct mbuf *m, size_t off)
 
        m->m_flags &= ~(M_BCAST|M_MCAST);
 
-       /*
-        * To avoid a "too big" situation at an intermediate router
-        * and the path MTU discovery process, specify the IPV6_MINMTU flag.
-        * Note that only echo and node information replies are affected,
-        * since the length of ICMP6 errors is limited to the minimum MTU.
-        */
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
        ip6_send(m);
        return;
 
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 4e588d83394..bc19e5d8eb6 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -1481,6 +1481,13 @@ ip6_send_dispatch(void *xmq)
 #endif /* IPSEC */
 
        while ((m = ml_dequeue(&ml)) != NULL) {
+               /*
+                * To avoid a "too big" situation at an intermediate router and
+                * the path MTU discovery process, specify the IPV6_MINMTU
+                * flag.  Note that only echo and node information replies are
+                * affected, since the length of ICMP6 errors is limited to the
+                * minimum MTU.
+                */
                ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
        }
        NET_UNLOCK();
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 422f49f1b65..9141303782c 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -134,6 +134,7 @@ struct      pkthdr {
        u_int                    ph_rtableid;   /* routing table id */
        u_int                    ph_ifidx;      /* rcv interface index */
        u_int8_t                 ph_loopcnt;    /* mbuf is looping in kernel */
+       u_int8_t                 ph_family;     /* af, used when queueing */
        struct pkthdr_pf         pf;
 };
 

Reply via email to