On Mon, Jul 20, 2015 at 01:09:07AM +0000, Florian Obser wrote:
> so pflow(4) shoving it's data with ip_output into the network stack
> seems wrong. this converts it to use sosend(9) and might even give us
> non-legacy IP support.

I think this is a good idea.  Comments inline.

bluhm

> tests from (heavy) pflow(4) users would be appriciated.

Sorry, I don't use pflow(4).

> diff --git if_pflow.c if_pflow.c
> index 4f3ac5e..624fdaf 100644
> --- if_pflow.c
> +++ if_pflow.c
> @@ -28,6 +28,8 @@
>  #include <sys/timeout.h>
>  #include <sys/ioctl.h>
>  #include <sys/kernel.h>
> +#include <sys/socket.h>
> +#include <sys/socketvar.h>
>  #include <sys/sysctl.h>
>  
>  #include <net/if.h>
> @@ -94,7 +96,6 @@ int pflow_pack_flow(struct pf_state *, struct pf_state_key 
> *,
>       struct pflow_softc *);
>  int  pflow_pack_flow_ipfix(struct pf_state *, struct pf_state_key *,
>       struct pflow_softc *);
> -int  pflow_get_dynport(void);
>  int  export_pflow_if(struct pf_state*, struct pf_state_key *,
>       struct pflow_softc *);
>  int  copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
> @@ -107,6 +108,8 @@ struct if_clone   pflow_cloner =
>      IF_CLONE_INITIALIZER("pflow", pflow_clone_create,
>      pflow_clone_destroy);
>  
> +extern struct proc proc0;
> +
>  void
>  pflowattach(int npflow)
>  {
> @@ -119,22 +122,51 @@ pflow_clone_create(struct if_clone *ifc, int unit)
>  {
>       struct ifnet            *ifp;
>       struct pflow_softc      *pflowif;
> +     struct socket           *so;
> +     struct sockaddr_in      *sin;
> +     struct mbuf             *m;
> +     int                      error;
> +
> +     error = 0;
> +     m = NULL;

Useless code, they are set a few lines below.

> +
> +     error = socreate(AF_INET, &so, SOCK_DGRAM, 0);
> +     if (error)
> +             return (error);
> +
> +     MGET(m, M_WAIT, MT_SONAME);
> +     sin = mtod(m, struct sockaddr_in *);

You have to initialialize struct sockaddr_in to 0 to clear the padding.

> +     sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
> +     sin->sin_family = AF_INET;
> +     sin->sin_addr.s_addr = INADDR_ANY;
> +     sin->sin_port = htons(0);
> +     error = sobind(so, m, &proc0);
> +     m_freem(m);
> +     if (error) {
> +             soclose(so);
> +             pflowif->so = NULL;

You have not set pflowif yet, so don't clear it.

> +             return (error);
> +     }
>  
>       if ((pflowif = malloc(sizeof(*pflowif),
> -         M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
> -             return (ENOMEM);
> -
> -     if ((pflowif->sc_imo.imo_membership = malloc(
> -         (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
> -         M_WAITOK|M_ZERO)) == NULL) {
> -             free(pflowif, M_DEVBUF, 0);
> +         M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) {
> +             soclose(so);
> +             pflowif->so = NULL;

You have not set pflowif yet, so don't clear it.

>               return (ENOMEM);
>       }
> -     pflowif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
> +
> +     pflowif->so = so;
> +
> +     MGET(pflowif->send_nam, M_WAIT, MT_SONAME);
> +     sin = mtod(pflowif->send_nam, struct sockaddr_in *);

memset(sin, 0 , sizeof(*sin));

> +     sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
> +     sin->sin_family = AF_INET;
> +     sin->sin_addr.s_addr = INADDR_ANY;
> +     sin->sin_port = 0;
> +
>       pflowif->sc_receiver_ip.s_addr = INADDR_ANY;
>       pflowif->sc_receiver_port = 0;
>       pflowif->sc_sender_ip.s_addr = INADDR_ANY;
> -     pflowif->sc_sender_port = pflow_get_dynport();
>       pflowif->sc_version = PFLOW_PROTO_DEFAULT;
>  
>       /* ipfix template init */
> @@ -244,10 +276,6 @@ pflow_clone_create(struct if_clone *ifc, int unit)
>       if_attach(ifp);
>       if_alloc_sadl(ifp);
>  
> -#if NBPFILTER > 0
> -     bpfattach(&pflowif->sc_if.if_bpf, ifp, DLT_RAW, 0);
> -#endif
> -
>       /* Insert into list of pflows */
>       SLIST_INSERT_HEAD(&pflowif_list, pflowif, sc_next);
>       return (0);
> @@ -257,9 +285,15 @@ int
>  pflow_clone_destroy(struct ifnet *ifp)
>  {
>       struct pflow_softc      *sc = ifp->if_softc;
> -     int                      s;
> +     int                      s, error;
> +
> +     error = 0;
> +     if (sc->so != NULL)
> +             error = soclose(sc->so);
>  
>       s = splnet();
> +     sc->so = NULL;

Strange that you close sc->so outside splnet() but set it to NULL
within.  Either splnet() is not needed or you have a use after free.
If pflow can be run at splnet() you have to protect soclose().

> +     m_freem(sc->send_nam);
>       if (timeout_initialized(&sc->sc_tmo))
>               timeout_del(&sc->sc_tmo);
>       if (timeout_initialized(&sc->sc_tmo6))
> @@ -269,10 +303,9 @@ pflow_clone_destroy(struct ifnet *ifp)
>       pflow_flush(sc);
>       if_detach(ifp);
>       SLIST_REMOVE(&pflowif_list, sc, pflow_softc, sc_next);
> -     free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
>       free(sc, M_DEVBUF, 0);
>       splx(s);
> -     return (0);
> +     return (error);
>  }
>  
>  /*
> @@ -312,6 +345,9 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
>       struct pflow_softc      *sc = ifp->if_softc;
>       struct ifreq            *ifr = (struct ifreq *)data;
>       struct pflowreq          pflowr;
> +     struct socket           *so;
> +     struct sockaddr_in      *sin;
> +     struct mbuf             *m;
>       int                      s, error;
>  
>       switch (cmd) {
> @@ -321,8 +357,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
>       case SIOCSIFFLAGS:
>               if ((ifp->if_flags & IFF_UP) &&
>                   sc->sc_receiver_ip.s_addr != INADDR_ANY &&
> -                 sc->sc_receiver_port != 0 &&
> -                 sc->sc_sender_port != 0) {
> +                 sc->sc_receiver_port != 0 && sc->so != NULL) {
>                       ifp->if_flags |= IFF_RUNNING;
>                       sc->sc_gcounter=pflowstats.pflow_flows;
>                       /* send templates on startup */
> @@ -374,16 +409,49 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
>                               return(EINVAL);
>                       }
>               }
> -             s = splnet();
>  
> +             s = splnet();
>               pflow_flush(sc);
> +             splx(s);

This splx() looks strange, too.  Why flush something that could be
filled by an interrupt after splx()?  I think you have to put
everything in one splnet() block.

I wonder wether pflow(4) is ever run at splnet().  Otherwise
splsoftnet() for the block should be enough.  But that is another
topic.  Keep the splnet() for now.

>  
> -             if (pflowr.addrmask & PFLOW_MASK_DSTIP)
> +             if (pflowr.addrmask & PFLOW_MASK_DSTIP) {
> +                     s = splnet();
>                       sc->sc_receiver_ip.s_addr = pflowr.receiver_ip.s_addr;
> -             if (pflowr.addrmask & PFLOW_MASK_DSTPRT)
> +                     sin = mtod(sc->send_nam, struct sockaddr_in *);
> +                     sin->sin_addr.s_addr = sc->sc_receiver_ip.s_addr;
> +                     splx(s);
> +             }
> +             if (pflowr.addrmask & PFLOW_MASK_DSTPRT) {
> +                     s = splnet();
>                       sc->sc_receiver_port = pflowr.receiver_port;
> -             if (pflowr.addrmask & PFLOW_MASK_SRCIP)
> +                     sin = mtod(sc->send_nam, struct sockaddr_in *);
> +                     sin->sin_port = pflowr.receiver_port;
> +                     splx(s);
> +             }
> +             if (pflowr.addrmask & PFLOW_MASK_SRCIP) {
> +                     error = socreate(AF_INET, &so, SOCK_DGRAM, 0);
> +                     if (error)
> +                             return (error);
> +                     
> +                     MGET(m, M_WAIT, MT_SONAME);
> +                     sin = mtod(m, struct sockaddr_in *);

memset(sin, 0 , sizeof(*sin));

> +                     sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
> +                     sin->sin_family = AF_INET;
> +                     sin->sin_addr.s_addr = pflowr.sender_ip.s_addr;
> +                     sin->sin_port = 0;
> +
> +                     error = sobind(so, m, &proc0);
> +                     m_freem(m);
> +                     if (error)

You leak the so here.

> +                             return (error);
> +
> +                     s = splnet();
>                       sc->sc_sender_ip.s_addr = pflowr.sender_ip.s_addr;
> +                     sc->so = so;

Are you sure that sc->so is NULL?  Do we leak the old socket here?

> +                     splx(s);
> +             }
> +
> +             s = splnet();
>               /* error check is above */
>               if (pflowr.addrmask & PFLOW_MASK_VERSION)
>                       sc->sc_version = pflowr.version;
> @@ -395,8 +463,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
>  
>               if ((ifp->if_flags & IFF_UP) &&
>                   sc->sc_receiver_ip.s_addr != INADDR_ANY &&
> -                 sc->sc_receiver_port != 0 &&
> -                 sc->sc_sender_port != 0) {
> +                 sc->sc_receiver_port != 0 && sc->so != NULL) {
>                       ifp->if_flags |= IFF_RUNNING;
>                       sc->sc_gcounter=pflowstats.pflow_flows;
>                       if (sc->sc_version == PFLOW_PROTO_10) {
> @@ -1083,78 +1150,12 @@ pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
>  int
>  pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
>  {
> -     struct udpiphdr *ui;
> -     u_int16_t        len = m->m_pkthdr.len;
> -#if NBPFILTER > 0
> -     struct ifnet    *ifp = &sc->sc_if;
> -#endif
> -     struct ip       *ip;
> -     int              err;
> -
> -     /* UDP Header*/
> -     M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
> -     if (m == NULL) {
> -             pflowstats.pflow_onomem++;
> -             return (ENOBUFS);
> -     }
> -
> -     ui = mtod(m, struct udpiphdr *);
> -     ui->ui_pr = IPPROTO_UDP;
> -     ui->ui_src = sc->sc_sender_ip;
> -     ui->ui_sport = sc->sc_sender_port;
> -     ui->ui_dst = sc->sc_receiver_ip;
> -     ui->ui_dport = sc->sc_receiver_port;
> -     ui->ui_ulen = htons(sizeof(struct udphdr) + len);
> -     ui->ui_sum = 0;
> -     m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
> -     m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
> -
> -     ip = (struct ip *)ui;
> -     ip->ip_v = IPVERSION;
> -     ip->ip_hl = sizeof(struct ip) >> 2;
> -     ip->ip_id = htons(ip_randomid());
> -     ip->ip_off = htons(IP_DF);
> -     ip->ip_tos = IPTOS_LOWDELAY;
> -     ip->ip_ttl = IPDEFTTL;
> -     ip->ip_len = htons(sizeof(struct udpiphdr) + len);
> -
> -#if NBPFILTER > 0
> -     if (ifp->if_bpf)
> -             bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> -#endif
> -
>       sc->sc_if.if_opackets++;
>       sc->sc_if.if_obytes += m->m_pkthdr.len;
>  
> -     if ((err = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL,
> -         0))) {
> -             pflowstats.pflow_oerrors++;
> -             sc->sc_if.if_oerrors++;
> -     }
> -     return (err);
> -}
> -
> -int
> -pflow_get_dynport(void)
> -{
> -     u_int16_t       tmp, low, high, cut;
> -
> -     low = ipport_hifirstauto;     /* sysctl */
> -     high = ipport_hilastauto;
> -
> -     cut = arc4random_uniform(1 + high - low) + low;
> -
> -     for (tmp = cut; tmp <= high; ++(tmp)) {
> -             if (!in_baddynamic(tmp, IPPROTO_UDP))
> -                     return (htons(tmp));
> -     }
> -
> -     for (tmp = cut - 1; tmp >= low; --(tmp)) {
> -             if (!in_baddynamic(tmp, IPPROTO_UDP))
> -                     return (htons(tmp));
> -     }
> -
> -     return (htons(ipport_hilastauto)); /* XXX */
> +     if (sc->so != NULL)
> +             return (sosend(sc->so, sc->send_nam, NULL, m, NULL, 0));
> +     return (EINVAL);
>  }
>  
>  int
> diff --git if_pflow.h if_pflow.h
> index 7e35ddd..0929cdb 100644
> --- if_pflow.h
> +++ if_pflow.h
> @@ -181,12 +181,12 @@ struct pflow_softc {
>       unsigned int             sc_maxcount6;
>       u_int64_t                sc_gcounter;
>       u_int32_t                sc_sequence;
> -     struct ip_moptions       sc_imo;
>       struct timeout           sc_tmo;
>       struct timeout           sc_tmo6;
>       struct timeout           sc_tmo_tmpl;
> +     struct socket           *so;
> +     struct mbuf             *send_nam;
>       struct in_addr           sc_sender_ip;
> -     u_int16_t                sc_sender_port;
>       struct in_addr           sc_receiver_ip;
>       u_int16_t                sc_receiver_port;
>       u_char                   sc_send_templates;
> 
> 
> -- 
> I'm not entirely sure you are real.

Reply via email to