Recirculation implementation for Linux kernel data path.

Signed-off-by: Andy Zhou <az...@nicira.com>
---
 datapath/actions.c      |   55 ++++++++++++++++++++++++++++++++++++++++++++++-
 datapath/datapath.c     |   35 ++++++++++++++++++------------
 datapath/datapath.h     |    3 ++-
 datapath/flow.h         |    4 ++++
 datapath/flow_netlink.c |   55 ++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 135 insertions(+), 17 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 0b66e7c..81ba67e 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -506,6 +506,37 @@ static int execute_set_action(struct sk_buff *skb,
        return err;
 }
 
+static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
+                                const struct nlattr *recirc_attr)
+{
+       const struct ovs_action_recirc *act_recirc;
+       struct sw_flow_key *key = OVS_CB(skb)->pkt_key;
+       struct vport *p;
+       u32 dp_hash = 0;
+
+       act_recirc = (struct ovs_action_recirc *) nla_data(recirc_attr);
+
+       key->recirc_id = act_recirc->recirc_id;
+       if (act_recirc->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
+               dp_hash = skb_get_rxhash(skb);
+               /* skb_get_rxhash() can return 0 when there
+                * is packet error. */
+               if (!dp_hash)
+                       dp_hash = 0x1;
+       }
+       key->dp_hash = dp_hash;
+
+       p = ovs_vport_rcu(dp, key->phy.in_port);
+       if (unlikely(!p)) {
+               kfree_skb(skb);
+               return -ENODEV;
+       }
+
+       ovs_dp_process_packet_with_key(p, skb);
+
+       return 0;
+}
+
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        const struct nlattr *attr, int len, bool keep_skb)
@@ -546,12 +577,34 @@ static int do_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
                        err = pop_vlan(skb);
                        break;
 
+               case OVS_ACTION_ATTR_RECIRC: {
+                       struct sk_buff *recirc_skb;
+                       const bool last_action = (a->nla_len == rem);
+
+                       if (last_action || keep_skb)
+                               /* Clone skb if not the last action. */
+                               recirc_skb = skb_clone(skb, GFP_ATOMIC);
+                       else
+                               recirc_skb = skb;
+
+                       err = execute_recirc(dp, recirc_skb, a);
+
+                       /* Return directly if recirc is the last action
+                        * or err. */
+                       if (last_action || err)
+                               return err;
+
+                       break;
+               }
+
                case OVS_ACTION_ATTR_SET:
                        err = execute_set_action(skb, nla_data(a));
                        break;
 
                case OVS_ACTION_ATTR_SAMPLE:
                        err = sample(dp, skb, a);
+                       if (unlikely(err)) /* skb already freed. */
+                               return err;
                        break;
                }
 
diff --git a/datapath/datapath.c b/datapath/datapath.c
index c6d42db..e2a2693 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -240,33 +240,24 @@ void ovs_dp_detach_port(struct vport *p)
        ovs_vport_del(p);
 }
 
-/* Must be called with rcu_read_lock. */
-void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+void ovs_dp_process_packet_with_key(const struct vport *p, struct sk_buff *skb)
 {
        struct datapath *dp = p->dp;
        struct sw_flow *flow;
        struct dp_stats_percpu *stats;
-       struct sw_flow_key key;
+       struct sw_flow_key *key = OVS_CB(skb)->pkt_key;
        u64 *stats_counter;
        u32 n_mask_hit;
-       int error;
 
        stats = this_cpu_ptr(dp->stats_percpu);
 
-       /* Extract flow from 'skb' into 'key'. */
-       error = ovs_flow_extract(skb, p->port_no, &key);
-       if (unlikely(error)) {
-               kfree_skb(skb);
-               return;
-       }
-
        /* Look up flow. */
-       flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
+       flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
        if (unlikely(!flow)) {
                struct dp_upcall_info upcall;
 
                upcall.cmd = OVS_PACKET_CMD_MISS;
-               upcall.key = &key;
+               upcall.key = key;
                upcall.userdata = NULL;
                upcall.portid = p->upcall_portid;
                ovs_dp_upcall(dp, skb, &upcall);
@@ -276,7 +267,6 @@ void ovs_dp_process_received_packet(struct vport *p, struct 
sk_buff *skb)
        }
 
        OVS_CB(skb)->flow = flow;
-       OVS_CB(skb)->pkt_key = &key;
 
        ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
        ovs_execute_actions(dp, skb);
@@ -290,6 +280,23 @@ out:
        u64_stats_update_end(&stats->sync);
 }
 
+/* Must be called with rcu_read_lock. */
+void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+{
+       int error;
+       struct sw_flow_key key;
+
+       /* Extract flow from 'skb' into 'key'. */
+       error = ovs_flow_extract(skb, p->port_no, &key);
+       if (unlikely(error)) {
+               kfree_skb(skb);
+               return;
+       }
+
+       OVS_CB(skb)->pkt_key = &key;
+       ovs_dp_process_packet_with_key(p, skb);
+}
+
 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
                  const struct dp_upcall_info *upcall_info)
 {
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 40e0f90..d3b06ee 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -188,6 +188,7 @@ extern struct genl_family dp_vport_genl_family;
 extern struct genl_multicast_group ovs_dp_vport_multicast_group;
 
 void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_process_packet_with_key(const struct vport *, struct sk_buff *);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
                  const struct dp_upcall_info *);
diff --git a/datapath/flow.h b/datapath/flow.h
index 5043a6e..3ccaec5 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -68,6 +68,8 @@ static inline void ovs_flow_tun_key_init(struct 
ovs_key_ipv4_tunnel *tun_key,
 }
 
 struct sw_flow_key {
+       u32 dp_hash;                    /* Datapath computed hash value */
+       u32 recirc_id;                  /* Recirculation ID */
        struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
        struct {
                u32     priority;       /* Packet QoS priority. */
@@ -190,4 +192,6 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
 
 int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
 
+u32 ovs_flow_hash_symmetric_l4(struct sw_flow_key *);
+
 #endif /* flow.h */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 5c32cd0..207bdeb 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -123,14 +123,20 @@ static bool match_validate(const struct sw_flow_match 
*match,
                        | (1ULL << OVS_KEY_ATTR_ICMP)
                        | (1ULL << OVS_KEY_ATTR_ICMPV6)
                        | (1ULL << OVS_KEY_ATTR_ARP)
+                       | (1ULL << OVS_KEY_ATTR_DP_HASH)
                        | (1ULL << OVS_KEY_ATTR_ND));
 
        /* Always allowed mask fields. */
        mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
                       | (1ULL << OVS_KEY_ATTR_IN_PORT)
-                      | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
+                      | (1ULL << OVS_KEY_ATTR_ETHERTYPE)
+                      | (1ULL << OVS_KEY_ATTR_RECIRC_ID));
 
        /* Check key attributes. */
+       if (match->key->dp_hash) {
+               mask_allowed |= (1ULL << OVS_KEY_ATTR_DP_HASH);
+       }
+
        if (match->key->eth.type == htons(ETH_P_ARP)
                        || match->key->eth.type == htons(ETH_P_RARP)) {
                key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
@@ -252,6 +258,8 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
        [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
        [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
        [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+       [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
+       [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
        [OVS_KEY_ATTR_TUNNEL] = -1,
 };
 
@@ -455,6 +463,35 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
                                 const struct nlattr **a, bool is_mask)
 {
+       if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
+               u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
+
+               if (!is_mask && !hash_val) {
+                       OVS_NLERR("Hash value can not be zero\n");
+                       return -EINVAL;
+               }
+
+               SW_FLOW_KEY_PUT(match, dp_hash, hash_val, is_mask);
+               *attrs &= ~(1ULL << OVS_KEY_ATTR_DP_HASH);
+       }
+
+       if (*attrs & (1ULL << OVS_KEY_ATTR_RECIRC_ID)) {
+               u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
+
+               if (is_mask && (recirc_id > 0 && recirc_id < UINT_MAX)) {
+                       OVS_NLERR("Reicrc_id mask is neither wildcard, nor 
exact match \n");
+                       return -EINVAL;
+               }
+
+               SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
+               *attrs &= ~(1ULL << OVS_KEY_ATTR_RECIRC_ID);
+       }
+
+       if (is_mask) {
+               /* Always exact match recirc_id. */
+               SW_FLOW_KEY_PUT(match, recirc_id, UINT_MAX, is_mask);
+       }
+
        if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
                SW_FLOW_KEY_PUT(match, phy.priority,
                          nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
@@ -859,6 +896,8 @@ int ovs_nla_get_flow_metadata(struct sw_flow *flow,
        flow->key.phy.in_port = DP_MAX_PORTS;
        flow->key.phy.priority = 0;
        flow->key.phy.skb_mark = 0;
+       flow->key.dp_hash = 0;
+       flow->key.recirc_id = 0;
        memset(tun_key, 0, sizeof(flow->key.tun_key));
 
        err = parse_flow_nlattrs(attr, a, &attrs);
@@ -882,6 +921,16 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
        struct nlattr *nla, *encap;
        bool is_mask = (swkey != output);
 
+       /* Only output dp_hash and recric_id if their key values are none zero*/
+       /* Ommitting those key attributes implies zero on the other size */
+       if (swkey->dp_hash)
+               if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->dp_hash))
+                       goto nla_put_failure;
+
+       if (swkey->recirc_id)
+               if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
+                       goto nla_put_failure;
+
        if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
                goto nla_put_failure;
 
@@ -1418,6 +1467,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
                /* Expected argument lengths, (u32)-1 for variable length. */
                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+                       [OVS_ACTION_ATTR_RECIRC] = sizeof(struct 
ovs_action_recirc),
                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct 
ovs_action_push_vlan),
                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
@@ -1461,6 +1511,9 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
                                return -EINVAL;
                        break;
 
+               case OVS_ACTION_ATTR_RECIRC:
+                       break;
+
                case OVS_ACTION_ATTR_SET:
                        err = validate_set(a, key, sfa, &skip_copy);
                        if (err)
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to