Hello,

</snip>

> > @@ -790,6 +795,7 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int 
> > *phlen)
> >         memcpy(ip + 1, p->ipopt_list, optlen);
> >         *phlen = sizeof(struct ip) + optlen;
> >         ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
> > +       ip->ip_hl += (optlen >> 2);
> >         return (m);
> >  }
> 
> I think ip_insertoptions() is not intended to insert additional
> options, but to set them.  The *phlen is set to htons(ntohs(ip->ip_len)
> + optlen), so the option length is optlen.  The ip->ip_hl += makes
> no sense.  I am not sure if ip->ip_hl is always initialized.
> 
> ip_insertoptions() returns the option length in &hlen.  I think it
> is the callers job to write it into the header.
> 
> Instead of touching ip_insertoptions() I would set ip->ip_hl = hlen
> >> 2 in this block, like it is done in ip_output().
> 
> > +       if (opts != NULL) {
> >                 m = ip_insertoptions(m, opts, &hlen);
> > +               ip->ip_v = IPVERSION;
> > +               ip->ip_off &= htons(IP_DF);
> > +               ip->ip_id = htons(ip_randomid());
> > +               ipstat_inc(ips_localout);
> > +               ip_send_raw(m);
> > +       } else

    yes, you are right. below is updated diff I would like to commit.
> 
> Appart from that, adding a special task seems the way to go.

    I think so too. Alternative way would be to pass send flags via m_pkthdr
    in mbuf, however there is no space. We would have to add a new member
    to m_pkthdr. I see such change as too intrusive (given we address a true
    corner case).

thanks and
regards
sashan


--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index a007aa6c2b3..eb03bbee0b3 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -846,10 +846,20 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
                printf("icmp_send dst %s src %s\n", dst, src);
        }
 #endif
-       if (opts != NULL)
+       /*
+        * ip_send() cannot handle IP options properly. So in case we have
+        * options fill out the IP header here and use ip_send_raw() instead.
+        */
+       if (opts != NULL) {
                m = ip_insertoptions(m, opts, &hlen);
-
-       ip_send(m);
+               ip->ip_hl = (hlen >> 2);
+               ip->ip_v = IPVERSION;
+               ip->ip_off &= htons(IP_DF);
+               ip->ip_id = htons(ip_randomid());
+               ipstat_inc(ips_localout);
+               ip_send_raw(m);
+       } else
+               ip_send(m);
 }
 
 u_int32_t
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 0ec3f723be4..6c935cc88c2 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -139,6 +139,7 @@ struct cpumem *ipcounters;
 int ip_sysctl_ipstat(void *, size_t *, void *);
 
 static struct mbuf_queue       ipsend_mq;
+static struct mbuf_queue       ipsendraw_mq;
 
 extern struct niqueue          arpinq;
 
@@ -147,7 +148,11 @@ int        ip_dooptions(struct mbuf *, struct ifnet *);
 int    in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
 
 static void ip_send_dispatch(void *);
+static void ip_sendraw_dispatch(void *);
 static struct task ipsend_task = TASK_INITIALIZER(ip_send_dispatch, 
&ipsend_mq);
+static struct task ipsendraw_task =
+       TASK_INITIALIZER(ip_sendraw_dispatch, &ipsendraw_mq);
+
 /*
  * Used to save the IP options in case a protocol wants to respond
  * to an incoming packet over the same route if the packet got here
@@ -217,6 +222,7 @@ ip_init(void)
                DP_SET(rootonlyports.udp, defrootonlyports_udp[i]);
 
        mq_init(&ipsend_mq, 64, IPL_SOFTNET);
+       mq_init(&ipsendraw_mq, 64, IPL_SOFTNET);
 
 #ifdef IPSEC
        ipsec_init();
@@ -1777,7 +1783,7 @@ ip_savecontrol(struct inpcb *inp, struct mbuf **mp, 
struct ip *ip,
 }
 
 void
-ip_send_dispatch(void *xmq)
+ip_send_do_dispatch(void *xmq, int flags)
 {
        struct mbuf_queue *mq = xmq;
        struct mbuf *m;
@@ -1789,14 +1795,33 @@ ip_send_dispatch(void *xmq)
 
        NET_LOCK();
        while ((m = ml_dequeue(&ml)) != NULL) {
-               ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
+               ip_output(m, NULL, NULL, flags, NULL, NULL, 0);
        }
        NET_UNLOCK();
 }
 
+void
+ip_sendraw_dispatch(void *xmq)
+{
+       ip_send_do_dispatch(xmq, IP_RAWOUTPUT);
+}
+
+void
+ip_send_dispatch(void *xmq)
+{
+       ip_send_do_dispatch(xmq, 0);
+}
+
 void
 ip_send(struct mbuf *m)
 {
        mq_enqueue(&ipsend_mq, m);
        task_add(net_tq(0), &ipsend_task);
 }
+
+void
+ip_send_raw(struct mbuf *m)
+{
+       mq_enqueue(&ipsendraw_mq, m);
+       task_add(net_tq(0), &ipsendraw_task);
+}
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index c01a3e7803c..e19b744fdf3 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -765,6 +765,13 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int 
*phlen)
        optlen = opt->m_len - sizeof(p->ipopt_dst);
        if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
                return (m);             /* XXX should fail */
+
+       /* check if options will fit to IP header */
+       if ((optlen + (ip->ip_hl << 2)) > (0x0f << 2)) {
+               *phlen = sizeof (struct ip);
+               return (m);
+       }
+
        if (p->ipopt_dst.s_addr)
                ip->ip_dst = p->ipopt_dst;
        if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 7ede24ce922..1a43675a7ac 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -240,6 +240,7 @@ struct mbuf *
 u_int16_t
         ip_randomid(void);
 void    ip_send(struct mbuf *);
+void    ip_send_raw(struct mbuf *);
 void    ip_slowtimo(void);
 struct mbuf *
         ip_srcroute(struct mbuf *);

Reply via email to