Until now, the tun_id and in_port have been lost when a packet is sent from
the kernel to userspace and then back to the kernel.  I didn't think that
this was a problem, but recent behavior made me look closer and see that
it makes a difference if sFlow is turned on or if an
ODP_ATTR_ACTION_CONTROLLER action is present.  We could possibly kluge
around those, but for future-proofing it seems better to pass the packet
metadata from userspace to the kernel.  That is what this commit does.

This commit introduces a user-kernel protocol break.  We could avoid that,
if it is desirable, by making ODP_PACKET_ATTR_KEY optional for
ODP_PACKET_CMD_EXECUTE commands.

Signed-off-by: Ben Pfaff <b...@nicira.com>
Bug #4769.
---
 datapath/datapath.c |   10 +++++-
 datapath/flow.c     |   89 +++++++++++++++++++++++++++++++++++++++++---------
 datapath/flow.h     |    2 +
 lib/dpif-linux.c    |    3 ++
 lib/dpif-netdev.c   |    1 +
 lib/dpif-provider.h |   11 ++++--
 lib/dpif.c          |    9 ++++-
 lib/dpif.h          |    6 ++-
 ofproto/ofproto.c   |   30 +++++++++++++----
 9 files changed, 130 insertions(+), 31 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index c948944..9c9740b 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -679,7 +679,8 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
        int err;
 
        err = -EINVAL;
-       if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
+       if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_KEY] ||
+           !a[ODP_PACKET_ATTR_ACTIONS] ||
            nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
                goto exit;
 
@@ -708,6 +709,12 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
        /* Initialize OVS_CB (it came from Netlink so might not be zeroed). */
        memset(OVS_CB(packet), 0, sizeof(struct ovs_skb_cb));
 
+       /* Get flow metadata provided by userspace. */
+       err = flow_metadata_from_nlattrs(&key.in_port, &key.tun_id,
+                                        a[ODP_PACKET_ATTR_KEY]);
+       if (err)
+               goto exit;
+
        err = flow_extract(packet, -1, &key, &is_frag);
        if (err)
                goto exit;
@@ -727,6 +734,7 @@ exit:
 
 static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
        [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+       [ODP_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
        [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 };
 
diff --git a/datapath/flow.c b/datapath/flow.c
index fe05df3..f0a97a9 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -580,6 +580,23 @@ int flow_cmp(const struct tbl_node *node, void *key2_)
        return !memcmp(key1, key2, sizeof(struct sw_flow_key));
 }
 
+/* The size of the argument for each %ODP_KEY_ATTR_* Netlink attribute.  */
+static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = {
+       [ODP_KEY_ATTR_TUN_ID] = 8,
+       [ODP_KEY_ATTR_IN_PORT] = 4,
+       [ODP_KEY_ATTR_ETHERNET] = sizeof(struct odp_key_ethernet),
+       [ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q),
+       [ODP_KEY_ATTR_ETHERTYPE] = 2,
+       [ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4),
+       [ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6),
+       [ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp),
+       [ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp),
+       [ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp),
+       [ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6),
+       [ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp),
+       [ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd),
+};
+
 /**
  * flow_from_nlattrs - parses Netlink attributes into a flow key.
  * @swkey: receives the extracted flow key.
@@ -603,22 +620,6 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const 
struct nlattr *attr)
 
        prev_type = ODP_KEY_ATTR_UNSPEC;
        nla_for_each_nested(nla, attr, rem) {
-               static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = {
-                       [ODP_KEY_ATTR_TUN_ID] = 8,
-                       [ODP_KEY_ATTR_IN_PORT] = 4,
-                       [ODP_KEY_ATTR_ETHERNET] = sizeof(struct 
odp_key_ethernet),
-                       [ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q),
-                       [ODP_KEY_ATTR_ETHERTYPE] = 2,
-                       [ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4),
-                       [ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6),
-                       [ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp),
-                       [ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp),
-                       [ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp),
-                       [ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6),
-                       [ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp),
-                       [ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd),
-               };
-
                const struct odp_key_ethernet *eth_key;
                const struct odp_key_8021q *q_key;
                const struct odp_key_ipv4 *ipv4_key;
@@ -814,6 +815,62 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const 
struct nlattr *attr)
        return -EINVAL;
 }
 
+/**
+ * flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
+ * @in_port: receives the extracted input port.
+ * @tun_id: receives the extracted tunnel ID.
+ * @key: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute
+ * sequence.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ */
+int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id,
+                              const struct nlattr *attr)
+{
+       const struct nlattr *nla;
+       u16 prev_type;
+       int rem;
+
+       *tun_id = 0;
+
+       prev_type = ODP_KEY_ATTR_UNSPEC;
+       nla_for_each_nested(nla, attr, rem) {
+                int type = nla_type(nla);
+
+                if (type > ODP_KEY_ATTR_MAX || nla_len(nla) != key_lens[type])
+                        return -EINVAL;
+
+               switch (TRANSITION(prev_type, type)) {
+               case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_TUN_ID):
+                       *tun_id = nla_get_be64(nla);
+                       break;
+
+               case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_IN_PORT):
+               case TRANSITION(ODP_KEY_ATTR_TUN_ID, ODP_KEY_ATTR_IN_PORT):
+                       if (nla_get_u32(nla) >= DP_MAX_PORTS)
+                               return -EINVAL;
+                       *in_port = nla_get_u32(nla);
+                       break;
+
+               default:
+                       goto done;
+               }
+
+               prev_type = type;
+       }
+       if (rem)
+               return -EINVAL;
+
+done:
+       if (prev_type == ODP_KEY_ATTR_UNSPEC ||
+           prev_type == ODP_KEY_ATTR_TUN_ID)
+               return -EINVAL;
+       return 0;
+}
+
 int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 {
        struct odp_key_ethernet *eth_key;
diff --git a/datapath/flow.h b/datapath/flow.h
index a40073a..0b2e0a4 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -127,6 +127,8 @@ int flow_cmp(const struct tbl_node *, void *target);
 
 int flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
 int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *);
+int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id,
+                              const struct nlattr *);
 
 static inline struct sw_flow *flow_cast(const struct tbl_node *node)
 {
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 509174c..d79ef91 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -33,6 +33,7 @@
 #include <unistd.h>
 
 #include "dpif-provider.h"
+#include "dynamic-string.h"
 #include "netdev.h"
 #include "netdev-vport.h"
 #include "netlink-socket.h"
@@ -698,6 +699,7 @@ dpif_linux_flow_dump_done(const struct dpif *dpif 
OVS_UNUSED, void *state_)
 
 static int
 dpif_linux_execute(struct dpif *dpif_,
+                   const struct nlattr *key, size_t key_len,
                    const struct nlattr *actions, size_t actions_len,
                    const struct ofpbuf *packet)
 {
@@ -715,6 +717,7 @@ dpif_linux_execute(struct dpif *dpif_,
     execute->dp_ifindex = dpif->dp_ifindex;
 
     nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, packet->data, packet->size);
+    nl_msg_put_unspec(buf, ODP_PACKET_ATTR_KEY, key, key_len);
     nl_msg_put_unspec(buf, ODP_PACKET_ATTR_ACTIONS, actions, actions_len);
 
     error = nl_sock_transact(genl_sock, buf, NULL);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 486ba48..daa260d 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -944,6 +944,7 @@ dpif_netdev_flow_dump_done(const struct dpif *dpif 
OVS_UNUSED, void *state_)
 
 static int
 dpif_netdev_execute(struct dpif *dpif,
+                    const struct nlattr *k OVS_UNUSED, size_t k_len OVS_UNUSED,
                     const struct nlattr *actions, size_t actions_len,
                     const struct ofpbuf *packet)
 {
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 91074d5..e0d9151 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -283,9 +283,14 @@ struct dpif_class {
     int (*flow_dump_done)(const struct dpif *dpif, void *state);
 
     /* Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet
-     * frame specified in 'packet'. */
-    int (*execute)(struct dpif *dpif, const struct nlattr *actions,
-                   size_t actions_len, const struct ofpbuf *packet);
+     * frame specified in 'packet' taken from the flow specified in the
+     * 'key_len' bytes of 'key'.  ('key' is mostly redundant with 'packet', but
+     * it contains some metadata that cannot be recovered from 'packet', such
+     * as tun_id and in_port.) */
+    int (*execute)(struct dpif *dpif,
+                   const struct nlattr *key, size_t key_len,
+                   const struct nlattr *actions, size_t actions_len,
+                   const struct ofpbuf *packet);
 
     /* Retrieves 'dpif''s "listen mask" into '*listen_mask'.  A 1-bit of value
      * 2**X set in '*listen_mask' indicates that 'dpif' will receive messages
diff --git a/lib/dpif.c b/lib/dpif.c
index a754613..649436e 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -913,11 +913,15 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump)
 }
 
 /* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on
- * the Ethernet frame specified in 'packet'.
+ * the Ethernet frame specified in 'packet' taken from the flow specified in
+ * the 'key_len' bytes of 'key'.  ('key' is mostly redundant with 'packet', but
+ * it contains some metadata that cannot be recovered from 'packet', such as
+ * tun_id and in_port.)
  *
  * Returns 0 if successful, otherwise a positive errno value. */
 int
 dpif_execute(struct dpif *dpif,
+             const struct nlattr *key, size_t key_len,
              const struct nlattr *actions, size_t actions_len,
              const struct ofpbuf *buf)
 {
@@ -925,7 +929,8 @@ dpif_execute(struct dpif *dpif,
 
     COVERAGE_INC(dpif_execute);
     if (actions_len > 0) {
-        error = dpif->dpif_class->execute(dpif, actions, actions_len, buf);
+        error = dpif->dpif_class->execute(dpif, key, key_len,
+                                          actions, actions_len, buf);
     } else {
         error = 0;
     }
diff --git a/lib/dpif.h b/lib/dpif.h
index 8872a2e..f0f9801 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -146,8 +146,10 @@ bool dpif_flow_dump_next(struct dpif_flow_dump *,
                          const struct dpif_flow_stats **);
 int dpif_flow_dump_done(struct dpif_flow_dump *);
 
-int dpif_execute(struct dpif *, const struct nlattr *actions,
-                 size_t actions_len, const struct ofpbuf *);
+int dpif_execute(struct dpif *,
+                 const struct nlattr *key, size_t key_len,
+                 const struct nlattr *actions, size_t actions_len,
+                 const struct ofpbuf *);
 
 enum dpif_upcall_type {
     DPIF_UC_MISS,               /* Miss in flow table. */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index aac9edd..6ba0030 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -1464,17 +1464,22 @@ ofproto_send_packet(struct ofproto *p, const struct 
flow *flow,
                     const union ofp_action *actions, size_t n_actions,
                     const struct ofpbuf *packet)
 {
+    struct ofpbuf key, *odp_actions;
+    struct odputil_keybuf keybuf;
     struct action_xlate_ctx ctx;
-    struct ofpbuf *odp_actions;
 
     action_xlate_ctx_init(&ctx, p, flow, packet);
     /* Always xlate packets originated in this function. */
     ctx.check_special = false;
     odp_actions = xlate_actions(&ctx, actions, n_actions);
 
+    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+    odp_flow_key_from_flow(&key, flow);
+
     /* XXX Should we translate the dpif_execute() errno value into an OpenFlow
      * error code? */
-    dpif_execute(p->dpif, odp_actions->data, odp_actions->size, packet);
+    dpif_execute(p->dpif, key.data, key.size,
+                 odp_actions->data, odp_actions->size, packet);
 
     ofpbuf_delete(odp_actions);
 
@@ -2132,9 +2137,15 @@ execute_odp_actions(struct ofproto *ofproto, const 
struct flow *flow,
 
         return true;
     } else {
+        struct odputil_keybuf keybuf;
+        struct ofpbuf key;
         int error;
 
-        error = dpif_execute(ofproto->dpif, odp_actions, actions_len, packet);
+        ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+        odp_flow_key_from_flow(&key, flow);
+
+        error = dpif_execute(ofproto->dpif, key.data, key.size,
+                             odp_actions, actions_len, packet);
         ofpbuf_delete(packet);
         return !error;
     }
@@ -3209,8 +3220,9 @@ handle_packet_out(struct ofconn *ofconn, const struct 
ofp_header *oh)
 {
     struct ofproto *p = ofconn->ofproto;
     struct ofp_packet_out *opo;
-    struct ofpbuf payload, *buffer;
+    struct ofpbuf payload, *buffer, key;
     union ofp_action *ofp_actions;
+    struct odputil_keybuf keybuf;
     struct action_xlate_ctx ctx;
     struct ofpbuf *odp_actions;
     struct ofpbuf request;
@@ -3258,10 +3270,14 @@ handle_packet_out(struct ofconn *ofconn, const struct 
ofp_header *oh)
         goto exit;
     }
 
+    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+    odp_flow_key_from_flow(&key, &flow);
+
     /* Send. */
     action_xlate_ctx_init(&ctx, p, &flow, &payload);
     odp_actions = xlate_actions(&ctx, ofp_actions, n_ofp_actions);
-    dpif_execute(p->dpif, odp_actions->data, odp_actions->size, &payload);
+    dpif_execute(p->dpif, key.data, key.size,
+                 odp_actions->data, odp_actions->size, &payload);
     ofpbuf_delete(odp_actions);
 
 exit:
@@ -4463,8 +4479,8 @@ handle_miss_upcall(struct ofproto *p, struct dpif_upcall 
*upcall)
 
         ofpbuf_init(&odp_actions, 32);
         nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_OUTPUT, ODPP_LOCAL);
-        dpif_execute(p->dpif, odp_actions.data, odp_actions.size,
-                     upcall->packet);
+        dpif_execute(p->dpif, upcall->key, upcall->key_len,
+                     odp_actions.data, odp_actions.size, upcall->packet);
         ofpbuf_uninit(&odp_actions);
     }
 
-- 
1.7.1


_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev_openvswitch.org

Reply via email to