On Mon, Jul 20, 2015 at 01:09:07AM +0000, Florian Obser wrote:
> so pflow(4) shoving it's data with ip_output into the network stack
> seems wrong. this converts it to use sosend(9) and might even give us
> non-legacy IP support.
I think this is a good idea. Comments inline.
bluhm
> tests from (heavy) pflow(4) users would be appriciated.
Sorry, I don't use pflow(4).
> diff --git if_pflow.c if_pflow.c
> index 4f3ac5e..624fdaf 100644
> --- if_pflow.c
> +++ if_pflow.c
> @@ -28,6 +28,8 @@
> #include <sys/timeout.h>
> #include <sys/ioctl.h>
> #include <sys/kernel.h>
> +#include <sys/socket.h>
> +#include <sys/socketvar.h>
> #include <sys/sysctl.h>
>
> #include <net/if.h>
> @@ -94,7 +96,6 @@ int pflow_pack_flow(struct pf_state *, struct pf_state_key
> *,
> struct pflow_softc *);
> int pflow_pack_flow_ipfix(struct pf_state *, struct pf_state_key *,
> struct pflow_softc *);
> -int pflow_get_dynport(void);
> int export_pflow_if(struct pf_state*, struct pf_state_key *,
> struct pflow_softc *);
> int copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
> @@ -107,6 +108,8 @@ struct if_clone pflow_cloner =
> IF_CLONE_INITIALIZER("pflow", pflow_clone_create,
> pflow_clone_destroy);
>
> +extern struct proc proc0;
> +
> void
> pflowattach(int npflow)
> {
> @@ -119,22 +122,51 @@ pflow_clone_create(struct if_clone *ifc, int unit)
> {
> struct ifnet *ifp;
> struct pflow_softc *pflowif;
> + struct socket *so;
> + struct sockaddr_in *sin;
> + struct mbuf *m;
> + int error;
> +
> + error = 0;
> + m = NULL;
Useless code, they are set a few lines below.
> +
> + error = socreate(AF_INET, &so, SOCK_DGRAM, 0);
> + if (error)
> + return (error);
> +
> + MGET(m, M_WAIT, MT_SONAME);
> + sin = mtod(m, struct sockaddr_in *);
You have to initialialize struct sockaddr_in to 0 to clear the padding.
> + sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
> + sin->sin_family = AF_INET;
> + sin->sin_addr.s_addr = INADDR_ANY;
> + sin->sin_port = htons(0);
> + error = sobind(so, m, &proc0);
> + m_freem(m);
> + if (error) {
> + soclose(so);
> + pflowif->so = NULL;
You have not set pflowif yet, so don't clear it.
> + return (error);
> + }
>
> if ((pflowif = malloc(sizeof(*pflowif),
> - M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
> - return (ENOMEM);
> -
> - if ((pflowif->sc_imo.imo_membership = malloc(
> - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
> - M_WAITOK|M_ZERO)) == NULL) {
> - free(pflowif, M_DEVBUF, 0);
> + M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) {
> + soclose(so);
> + pflowif->so = NULL;
You have not set pflowif yet, so don't clear it.
> return (ENOMEM);
> }
> - pflowif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
> +
> + pflowif->so = so;
> +
> + MGET(pflowif->send_nam, M_WAIT, MT_SONAME);
> + sin = mtod(pflowif->send_nam, struct sockaddr_in *);
memset(sin, 0 , sizeof(*sin));
> + sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
> + sin->sin_family = AF_INET;
> + sin->sin_addr.s_addr = INADDR_ANY;
> + sin->sin_port = 0;
> +
> pflowif->sc_receiver_ip.s_addr = INADDR_ANY;
> pflowif->sc_receiver_port = 0;
> pflowif->sc_sender_ip.s_addr = INADDR_ANY;
> - pflowif->sc_sender_port = pflow_get_dynport();
> pflowif->sc_version = PFLOW_PROTO_DEFAULT;
>
> /* ipfix template init */
> @@ -244,10 +276,6 @@ pflow_clone_create(struct if_clone *ifc, int unit)
> if_attach(ifp);
> if_alloc_sadl(ifp);
>
> -#if NBPFILTER > 0
> - bpfattach(&pflowif->sc_if.if_bpf, ifp, DLT_RAW, 0);
> -#endif
> -
> /* Insert into list of pflows */
> SLIST_INSERT_HEAD(&pflowif_list, pflowif, sc_next);
> return (0);
> @@ -257,9 +285,15 @@ int
> pflow_clone_destroy(struct ifnet *ifp)
> {
> struct pflow_softc *sc = ifp->if_softc;
> - int s;
> + int s, error;
> +
> + error = 0;
> + if (sc->so != NULL)
> + error = soclose(sc->so);
>
> s = splnet();
> + sc->so = NULL;
Strange that you close sc->so outside splnet() but set it to NULL
within. Either splnet() is not needed or you have a use after free.
If pflow can be run at splnet() you have to protect soclose().
> + m_freem(sc->send_nam);
> if (timeout_initialized(&sc->sc_tmo))
> timeout_del(&sc->sc_tmo);
> if (timeout_initialized(&sc->sc_tmo6))
> @@ -269,10 +303,9 @@ pflow_clone_destroy(struct ifnet *ifp)
> pflow_flush(sc);
> if_detach(ifp);
> SLIST_REMOVE(&pflowif_list, sc, pflow_softc, sc_next);
> - free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
> free(sc, M_DEVBUF, 0);
> splx(s);
> - return (0);
> + return (error);
> }
>
> /*
> @@ -312,6 +345,9 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> struct pflow_softc *sc = ifp->if_softc;
> struct ifreq *ifr = (struct ifreq *)data;
> struct pflowreq pflowr;
> + struct socket *so;
> + struct sockaddr_in *sin;
> + struct mbuf *m;
> int s, error;
>
> switch (cmd) {
> @@ -321,8 +357,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> case SIOCSIFFLAGS:
> if ((ifp->if_flags & IFF_UP) &&
> sc->sc_receiver_ip.s_addr != INADDR_ANY &&
> - sc->sc_receiver_port != 0 &&
> - sc->sc_sender_port != 0) {
> + sc->sc_receiver_port != 0 && sc->so != NULL) {
> ifp->if_flags |= IFF_RUNNING;
> sc->sc_gcounter=pflowstats.pflow_flows;
> /* send templates on startup */
> @@ -374,16 +409,49 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> return(EINVAL);
> }
> }
> - s = splnet();
>
> + s = splnet();
> pflow_flush(sc);
> + splx(s);
This splx() looks strange, too. Why flush something that could be
filled by an interrupt after splx()? I think you have to put
everything in one splnet() block.
I wonder wether pflow(4) is ever run at splnet(). Otherwise
splsoftnet() for the block should be enough. But that is another
topic. Keep the splnet() for now.
>
> - if (pflowr.addrmask & PFLOW_MASK_DSTIP)
> + if (pflowr.addrmask & PFLOW_MASK_DSTIP) {
> + s = splnet();
> sc->sc_receiver_ip.s_addr = pflowr.receiver_ip.s_addr;
> - if (pflowr.addrmask & PFLOW_MASK_DSTPRT)
> + sin = mtod(sc->send_nam, struct sockaddr_in *);
> + sin->sin_addr.s_addr = sc->sc_receiver_ip.s_addr;
> + splx(s);
> + }
> + if (pflowr.addrmask & PFLOW_MASK_DSTPRT) {
> + s = splnet();
> sc->sc_receiver_port = pflowr.receiver_port;
> - if (pflowr.addrmask & PFLOW_MASK_SRCIP)
> + sin = mtod(sc->send_nam, struct sockaddr_in *);
> + sin->sin_port = pflowr.receiver_port;
> + splx(s);
> + }
> + if (pflowr.addrmask & PFLOW_MASK_SRCIP) {
> + error = socreate(AF_INET, &so, SOCK_DGRAM, 0);
> + if (error)
> + return (error);
> +
> + MGET(m, M_WAIT, MT_SONAME);
> + sin = mtod(m, struct sockaddr_in *);
memset(sin, 0 , sizeof(*sin));
> + sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
> + sin->sin_family = AF_INET;
> + sin->sin_addr.s_addr = pflowr.sender_ip.s_addr;
> + sin->sin_port = 0;
> +
> + error = sobind(so, m, &proc0);
> + m_freem(m);
> + if (error)
You leak the so here.
> + return (error);
> +
> + s = splnet();
> sc->sc_sender_ip.s_addr = pflowr.sender_ip.s_addr;
> + sc->so = so;
Are you sure that sc->so is NULL? Do we leak the old socket here?
> + splx(s);
> + }
> +
> + s = splnet();
> /* error check is above */
> if (pflowr.addrmask & PFLOW_MASK_VERSION)
> sc->sc_version = pflowr.version;
> @@ -395,8 +463,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
>
> if ((ifp->if_flags & IFF_UP) &&
> sc->sc_receiver_ip.s_addr != INADDR_ANY &&
> - sc->sc_receiver_port != 0 &&
> - sc->sc_sender_port != 0) {
> + sc->sc_receiver_port != 0 && sc->so != NULL) {
> ifp->if_flags |= IFF_RUNNING;
> sc->sc_gcounter=pflowstats.pflow_flows;
> if (sc->sc_version == PFLOW_PROTO_10) {
> @@ -1083,78 +1150,12 @@ pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
> int
> pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
> {
> - struct udpiphdr *ui;
> - u_int16_t len = m->m_pkthdr.len;
> -#if NBPFILTER > 0
> - struct ifnet *ifp = &sc->sc_if;
> -#endif
> - struct ip *ip;
> - int err;
> -
> - /* UDP Header*/
> - M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
> - if (m == NULL) {
> - pflowstats.pflow_onomem++;
> - return (ENOBUFS);
> - }
> -
> - ui = mtod(m, struct udpiphdr *);
> - ui->ui_pr = IPPROTO_UDP;
> - ui->ui_src = sc->sc_sender_ip;
> - ui->ui_sport = sc->sc_sender_port;
> - ui->ui_dst = sc->sc_receiver_ip;
> - ui->ui_dport = sc->sc_receiver_port;
> - ui->ui_ulen = htons(sizeof(struct udphdr) + len);
> - ui->ui_sum = 0;
> - m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
> - m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
> -
> - ip = (struct ip *)ui;
> - ip->ip_v = IPVERSION;
> - ip->ip_hl = sizeof(struct ip) >> 2;
> - ip->ip_id = htons(ip_randomid());
> - ip->ip_off = htons(IP_DF);
> - ip->ip_tos = IPTOS_LOWDELAY;
> - ip->ip_ttl = IPDEFTTL;
> - ip->ip_len = htons(sizeof(struct udpiphdr) + len);
> -
> -#if NBPFILTER > 0
> - if (ifp->if_bpf)
> - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> -#endif
> -
> sc->sc_if.if_opackets++;
> sc->sc_if.if_obytes += m->m_pkthdr.len;
>
> - if ((err = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL,
> - 0))) {
> - pflowstats.pflow_oerrors++;
> - sc->sc_if.if_oerrors++;
> - }
> - return (err);
> -}
> -
> -int
> -pflow_get_dynport(void)
> -{
> - u_int16_t tmp, low, high, cut;
> -
> - low = ipport_hifirstauto; /* sysctl */
> - high = ipport_hilastauto;
> -
> - cut = arc4random_uniform(1 + high - low) + low;
> -
> - for (tmp = cut; tmp <= high; ++(tmp)) {
> - if (!in_baddynamic(tmp, IPPROTO_UDP))
> - return (htons(tmp));
> - }
> -
> - for (tmp = cut - 1; tmp >= low; --(tmp)) {
> - if (!in_baddynamic(tmp, IPPROTO_UDP))
> - return (htons(tmp));
> - }
> -
> - return (htons(ipport_hilastauto)); /* XXX */
> + if (sc->so != NULL)
> + return (sosend(sc->so, sc->send_nam, NULL, m, NULL, 0));
> + return (EINVAL);
> }
>
> int
> diff --git if_pflow.h if_pflow.h
> index 7e35ddd..0929cdb 100644
> --- if_pflow.h
> +++ if_pflow.h
> @@ -181,12 +181,12 @@ struct pflow_softc {
> unsigned int sc_maxcount6;
> u_int64_t sc_gcounter;
> u_int32_t sc_sequence;
> - struct ip_moptions sc_imo;
> struct timeout sc_tmo;
> struct timeout sc_tmo6;
> struct timeout sc_tmo_tmpl;
> + struct socket *so;
> + struct mbuf *send_nam;
> struct in_addr sc_sender_ip;
> - u_int16_t sc_sender_port;
> struct in_addr sc_receiver_ip;
> u_int16_t sc_receiver_port;
> u_char sc_send_templates;
>
>
> --
> I'm not entirely sure you are real.