Re: [ovs-dev] [PATCH] Implement new fragment handling policy.

Ben Pfaff Wed, 19 Oct 2011 22:56:46 -0700

On Wed, Oct 19, 2011 at 06:09:30PM -0700, Jesse Gross wrote:
> >     switch (wc->tos_frag_mask & FLOW_FRAG_MASK) {
> > -    case FLOW_FRAG_ANY | FLOW_FRAG_FIRST:
> > +    case FLOW_FRAG_ANY | FLOW_FRAG_LATER:
> >         ds_put_format(s, "frag=%s,",
> >                       f->tos_frag & FLOW_FRAG_ANY
> > -                      ? (f->tos_frag & FLOW_FRAG_FIRST ? "first" : "later")
> > -                      : (f->tos_frag & FLOW_FRAG_FIRST ? "<error>" : 
> > "no"));
> > +                      ? (f->tos_frag & FLOW_FRAG_LATER ? "later" : "first")
> > +                      : (f->tos_frag & FLOW_FRAG_LATER ? "no" : 
> > "<error>"));
>
> I think the last condition is reversed because !FLOW_FRAG_ANY and
> FLOW_FRAG_LATER is still an error.


Oops.  Applied:

diff --git a/lib/classifier.c b/lib/classifier.c
index 0335f13..869029f 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -618,7 +618,7 @@ cls_rule_format(const struct cls_rule *rule, struct ds *s)
         ds_put_format(s, "frag=%s,",
                       f->tos_frag & FLOW_FRAG_ANY
                       ? (f->tos_frag & FLOW_FRAG_LATER ? "later" : "first")
-                      : (f->tos_frag & FLOW_FRAG_LATER ? "no" : "<error>"));
+                      : (f->tos_frag & FLOW_FRAG_LATER ? "<error>" : "no"));
         break;

     case FLOW_FRAG_ANY:

> > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> > index 58fc18e..536a213 100644
> > --- a/ofproto/ofproto-dpif.c
> > +++ b/ofproto/ofproto-dpif.c
> > @@ -2954,7 +2954,7 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const 
> > struct flow *flow,
> >     }
> >
> >     cls = &ofproto->up.tables[table_id];
> > -    if (flow->tos_frag & FLOW_FRAG_FIRST
> > +    if ((flow->tos_frag & FLOW_FRAG_MASK) == FLOW_FRAG_ANY
>
> Is there a reason for this to not be (flow->tos_frag & FLOW_FRAG_ANY)?
>  I realize that it does the masking out for last frags as well, which
> is not strictly necessary although I'm not sure that it matters all
> that much.

Over-optimization: I wanted to avoid the "struct flow" copy in that
case.

I changed it:

--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -2954,7 +2954,7 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const stru\
ct flow *flow,
     }

     cls = &ofproto->up.tables[table_id];
-    if ((flow->tos_frag & FLOW_FRAG_MASK) == FLOW_FRAG_ANY
+    if (flow->tos_frag & FLOW_FRAG_ANY
         && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
         /* For OFPC_NORMAL frag_handling, we must pretend that transport ports
          * are unavailable. */

> Otherwise, the incremental looks good.  However, I realized that there
> is one more issue: when we pass up the flow key for UDP GSO packets,
> they will all reflect the first fragment and not the later packets.

Hmm, good point.

Here's a build-tested-only try at this.  It needs testing and comments,
at least.  Is it on the right track?

(I've appended two copies.  The first one is a full diff, the second one
uses "git diff -b" to omit changes that reindented lines.)

--8<--------------------------cut here-------------------------->8--

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 2f8027c..2815bcc 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -84,8 +84,10 @@ EXPORT_SYMBOL(dp_ioctl_hook);
 static LIST_HEAD(dps);
 
 static struct vport *new_vport(const struct vport_parms *);
-static int queue_userspace_packets(struct datapath *, struct sk_buff *,
-                                const struct dp_upcall_info *);
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+                            const struct dp_upcall_info *);
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
+                                 const struct dp_upcall_info *);
 
 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
 struct datapath *get_dp(int dp_ifindex)
@@ -353,8 +355,8 @@ static struct genl_family dp_packet_genl_family = {
 int dp_upcall(struct datapath *dp, struct sk_buff *skb,
              const struct dp_upcall_info *upcall_info)
 {
-       struct sk_buff *segs = NULL;
        struct dp_stats_percpu *stats;
+       int dp_ifindex;
        int err;
 
        if (upcall_info->pid == 0) {
@@ -362,30 +364,18 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb,
                goto err;
        }
 
-       forward_ip_summed(skb, true);
-
-       /* Break apart GSO packets into their component pieces.  Otherwise
-        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
-       if (skb_is_gso(skb)) {
-               segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-               
-               if (IS_ERR(segs)) {
-                       err = PTR_ERR(segs);
-                       goto err;
-               }
-               skb = segs;
+       dp_ifindex = get_dpifindex(dp);
+       if (!dp_ifindex) {
+               err = -ENODEV;
+               goto err;
        }
 
-       err = queue_userspace_packets(dp, skb, upcall_info);
-       if (segs) {
-               struct sk_buff *next;
-               /* Free GSO-segments */
-               do {
-                       next = segs->next;
-                       kfree_skb(segs);
-               } while ((segs = next) != NULL);
-       }
+       forward_ip_summed(skb, true);
 
+       if (!skb_is_gso(skb))
+               err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+       else
+               err = queue_gso_packets(dp_ifindex, skb, upcall_info);
        if (err)
                goto err;
 
@@ -401,68 +391,93 @@ err:
        return err;
 }
 
-/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
- * 'upcall_info'.  There will be only one packet unless we broke up a GSO
- * packet.
- */
-static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
-                                  const struct dp_upcall_info *upcall_info)
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
+                            const struct dp_upcall_info *upcall_info)
 {
-       int dp_ifindex;
+       struct dp_upcall_info later_info;
+       struct sw_flow_key later_key;
+       struct sk_buff *segs, *nskb;
+       int err;
 
-       dp_ifindex = get_dpifindex(dp);
-       if (!dp_ifindex)
-               return -ENODEV;
+       segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
 
+       /* Queue all of the segments. */
+       skb = segs;
        do {
-               struct ovs_header *upcall;
-               struct sk_buff *user_skb; /* to be queued to userspace */
-               struct nlattr *nla;
-               unsigned int len;
-               int err;
-
-               err = vlan_deaccel_tag(skb);
-               if (unlikely(err))
-                       return err;
-
-               if (nla_attr_size(skb->len) > USHRT_MAX)
-                       return -EFBIG;
-
-               len = sizeof(struct ovs_header);
-               len += nla_total_size(skb->len);
-               len += nla_total_size(FLOW_BUFSIZE);
-               if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
-                       len += nla_total_size(8);
-
-               user_skb = genlmsg_new(len, GFP_ATOMIC);
-               if (!user_skb)
-                       return -ENOMEM;
-
-               upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
-                                        0, upcall_info->cmd);
-               upcall->dp_ifindex = dp_ifindex;
-
-               nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
-               flow_to_nlattrs(upcall_info->key, user_skb);
-               nla_nest_end(user_skb, nla);
-
-               if (upcall_info->userdata)
-                       nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
-                                   nla_get_u64(upcall_info->userdata));
-
-               nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
-               if (skb->ip_summed == CHECKSUM_PARTIAL)
-                       copy_and_csum_skb(skb, nla_data(nla));
-               else
-                       skb_copy_bits(skb, 0, nla_data(nla), skb->len);
-
-               err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
+               err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
                if (err)
-                       return err;
+                       break;
+
+               if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+                       later_key = *upcall_info->key;
+                       later_key.ip.tos_frag &= ~OVS_FRAG_TYPE_MASK;
+                       later_key.ip.tos_frag |= OVS_FRAG_TYPE_LATER;
 
+                       later_info = *upcall_info;
+                       later_info.key = &later_key;
+                       upcall_info = &later_info;
+               }
        } while ((skb = skb->next));
 
-       return 0;
+       /* Free all of the segments. */
+       skb = segs;
+       do {
+               nskb = skb->next;
+               if (err)
+                       kfree_skb(skb);
+               else
+                       consume_skb(skb);
+       } while ((skb = nskb));
+       return err;
+}
+
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+                                 const struct dp_upcall_info *upcall_info)
+{
+       struct ovs_header *upcall;
+       struct sk_buff *user_skb; /* to be queued to userspace */
+       struct nlattr *nla;
+       unsigned int len;
+       int err;
+
+       err = vlan_deaccel_tag(skb);
+       if (unlikely(err))
+               return err;
+
+       if (nla_attr_size(skb->len) > USHRT_MAX)
+               return -EFBIG;
+
+       len = sizeof(struct ovs_header);
+       len += nla_total_size(skb->len);
+       len += nla_total_size(FLOW_BUFSIZE);
+       if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
+               len += nla_total_size(8);
+
+       user_skb = genlmsg_new(len, GFP_ATOMIC);
+       if (!user_skb)
+               return -ENOMEM;
+
+       upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+                            0, upcall_info->cmd);
+       upcall->dp_ifindex = dp_ifindex;
+
+       nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
+       flow_to_nlattrs(upcall_info->key, user_skb);
+       nla_nest_end(user_skb, nla);
+
+       if (upcall_info->userdata)
+               nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
+                           nla_get_u64(upcall_info->userdata));
+
+       nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
+       if (skb->ip_summed == CHECKSUM_PARTIAL)
+               copy_and_csum_skb(skb, nla_data(nla));
+       else
+               skb_copy_bits(skb, 0, nla_data(nla), skb->len);
+
+       return genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
 }
 
 /* Called with genl_mutex. */

--8<--------------------------cut here-------------------------->8--

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 2f8027c..2815bcc 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -84,7 +84,9 @@ EXPORT_SYMBOL(dp_ioctl_hook);
 static LIST_HEAD(dps);
 
 static struct vport *new_vport(const struct vport_parms *);
-static int queue_userspace_packets(struct datapath *, struct sk_buff *,
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+                            const struct dp_upcall_info *);
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
                                 const struct dp_upcall_info *);
 
 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
@@ -353,8 +355,8 @@ static struct genl_family dp_packet_genl_family = {
 int dp_upcall(struct datapath *dp, struct sk_buff *skb,
              const struct dp_upcall_info *upcall_info)
 {
-       struct sk_buff *segs = NULL;
        struct dp_stats_percpu *stats;
+       int dp_ifindex;
        int err;
 
        if (upcall_info->pid == 0) {
@@ -362,30 +364,18 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb,
                goto err;
        }
 
-       forward_ip_summed(skb, true);
-
-       /* Break apart GSO packets into their component pieces.  Otherwise
-        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
-       if (skb_is_gso(skb)) {
-               segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-               
-               if (IS_ERR(segs)) {
-                       err = PTR_ERR(segs);
+       dp_ifindex = get_dpifindex(dp);
+       if (!dp_ifindex) {
+               err = -ENODEV;
                        goto err;
                }
-               skb = segs;
-       }
 
-       err = queue_userspace_packets(dp, skb, upcall_info);
-       if (segs) {
-               struct sk_buff *next;
-               /* Free GSO-segments */
-               do {
-                       next = segs->next;
-                       kfree_skb(segs);
-               } while ((segs = next) != NULL);
-       }
+       forward_ip_summed(skb, true);
 
+       if (!skb_is_gso(skb))
+               err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+       else
+               err = queue_gso_packets(dp_ifindex, skb, upcall_info);
        if (err)
                goto err;
 
@@ -401,20 +391,51 @@ err:
        return err;
 }
 
-/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
- * 'upcall_info'.  There will be only one packet unless we broke up a GSO
- * packet.
- */
-static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
                                   const struct dp_upcall_info *upcall_info)
 {
-       int dp_ifindex;
+       struct dp_upcall_info later_info;
+       struct sw_flow_key later_key;
+       struct sk_buff *segs, *nskb;
+       int err;
 
-       dp_ifindex = get_dpifindex(dp);
-       if (!dp_ifindex)
-               return -ENODEV;
+       segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
+
+       /* Queue all of the segments. */
+       skb = segs;
+       do {
+               err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+               if (err)
+                       break;
 
+               if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+                       later_key = *upcall_info->key;
+                       later_key.ip.tos_frag &= ~OVS_FRAG_TYPE_MASK;
+                       later_key.ip.tos_frag |= OVS_FRAG_TYPE_LATER;
+
+                       later_info = *upcall_info;
+                       later_info.key = &later_key;
+                       upcall_info = &later_info;
+               }
+       } while ((skb = skb->next));
+
+       /* Free all of the segments. */
+       skb = segs;
        do {
+               nskb = skb->next;
+               if (err)
+                       kfree_skb(skb);
+               else
+                       consume_skb(skb);
+       } while ((skb = nskb));
+       return err;
+}
+
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+                                 const struct dp_upcall_info *upcall_info)
+{
                struct ovs_header *upcall;
                struct sk_buff *user_skb; /* to be queued to userspace */
                struct nlattr *nla;
@@ -456,13 +477,7 @@ static int queue_userspace_packets(struct datapath *dp, 
struct sk_buff *skb,
                else
                        skb_copy_bits(skb, 0, nla_data(nla), skb->len);
 
-               err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
-               if (err)
-                       return err;
-
-       } while ((skb = skb->next));
-
-       return 0;
+       return genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
 }
 
 /* Called with genl_mutex. */
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Re: [ovs-dev] [PATCH] Implement new fragment handling policy.

Reply via email to