Thanks for the review, most of this is easy to address On Oct 10, 2012, at 6:23 PM, Pravin Shelar <pshe...@nicira.com> wrote:
> Patch looks good, I have few comments inlined. > > > On Tue, Oct 9, 2012 at 11:49 AM, Kyle Mestery <kmest...@cisco.com> wrote: >> This is a first pass at providing a tun_key which can be >> used as the basis for flow-based tunnelling. The >> tun_key includes and replaces the tun_id in both struct >> ovs_skb_cb and struct sw_tun_key. >> >> This patch allows all existing tun_id behaviour to still work. Existing >> users of tun_id are redirected to tun_key->tun_id to retain compatibility. >> However, when the userspace code is updated to make use of the new tun_key, >> the old behaviour will be deprecated and removed. >> >> NOTE: With these changes, the tunneling code no longer assumes input and >> output keys are symmetric. If they are not, PMTUD needs to be disabled >> for tunneling to work. >> >> Signed-off-by: Kyle Mestery <kmest...@cisco.com> >> Cc: Simon Horman <ho...@verge.net.au> >> Cc: Jesse Gross <je...@nicira.com> >> --- >> V6: >> - Fix more comments addressed from Jesse. >> V5: >> - Address another round of comments from Jesse. >> V4: >> - Address 2 comments from Jesse: >> - When processing actions, if OVS_CB(skb)->tun_key is NULL, point it at one >> on the stack temporarily. This goes away when we remove the ability to set >> tun_id outside the scope of tun_key. >> - Move tun_key to the end of struct sw_flow_key. >> V3: >> - Fix issues found during review by Jesse. >> - Add a NEWS entry around tunnel code no longer assuming symmetric input and >> output tunnel keys. >> >> V2: >> - Fix blank line addition/removal found by Simon. >> - Fix hex printing output found by Simon. >> --- >> NEWS | 3 ++ >> datapath/actions.c | 38 +++++++++++++---- >> datapath/datapath.c | 10 ++++- >> datapath/datapath.h | 5 ++- >> datapath/flow.c | 64 ++++++++++++++++++++++++---- >> datapath/flow.h | 12 ++++-- >> datapath/tunnel.c | 101 >> ++++++++++++++++++++++++++++---------------- >> datapath/tunnel.h | 15 ++++++- >> datapath/vport-capwap.c | 12 +++--- >> datapath/vport-gre.c | 15 ++++--- >> datapath/vport.c | 2 +- >> include/linux/openvswitch.h | 18 +++++++- >> lib/dpif-netdev.c | 1 + >> lib/odp-util.c | 15 ++++++- >> lib/odp-util.h | 3 +- >> 15 files changed, 235 insertions(+), 79 deletions(-) >> >> diff --git a/NEWS b/NEWS >> index 29fd9f3..ae831e3 100644 >> --- a/NEWS >> +++ b/NEWS >> @@ -1,5 +1,8 @@ >> post-v1.8.0 >> ------------------------ >> + - The tunneling code no longer assumes input and output keys are >> symmetric. >> + If they are not, PMTUD needs to be disabled for tunneling to work. >> Note >> + this only applies to flow-based keys. >> - FreeBSD is now a supported platform, thanks to code contributions from >> Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. >> - ovs-bugtool: New --ovs option to report only OVS related information. >> diff --git a/datapath/actions.c b/datapath/actions.c >> index ec9b595..fa8c10d 100644 >> --- a/datapath/actions.c >> +++ b/datapath/actions.c >> @@ -37,7 +37,8 @@ >> #include "vport.h" >> >> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, >> - const struct nlattr *attr, int len, bool keep_skb); >> + const struct nlattr *attr, int len, >> + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb); >> >> static int make_writable(struct sk_buff *skb, int write_len) >> { >> @@ -329,11 +330,14 @@ static int sample(struct datapath *dp, struct sk_buff >> *skb, >> } >> >> return do_execute_actions(dp, skb, nla_data(acts_list), >> - nla_len(acts_list), true); >> + nla_len(acts_list), >> + OVS_CB(skb)->tun_key, >> + true); >> } >> >> static int execute_set_action(struct sk_buff *skb, >> - const struct nlattr *nested_attr) >> + const struct nlattr *nested_attr, >> + struct ovs_key_ipv4_tunnel *tun_key) >> { >> int err = 0; >> >> @@ -343,7 +347,21 @@ static int execute_set_action(struct sk_buff *skb, >> break; >> >> case OVS_KEY_ATTR_TUN_ID: >> - OVS_CB(skb)->tun_id = nla_get_be64(nested_attr); >> + if (OVS_CB(skb)->tun_key == NULL) { >> + /* If tun_key is NULL for this skb, assign it to >> + * a value the caller passed in for action processing >> + * and output. This can disappear once we drop >> support >> + * for setting tun_id outside of tun_key. >> + */ >> + memset(tun_key, 0, sizeof(struct >> ovs_key_ipv4_tunnel)); >> + OVS_CB(skb)->tun_key = tun_key; >> + } >> + >> + OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr); >> + break; >> + >> + case OVS_KEY_ATTR_IPV4_TUNNEL: >> + OVS_CB(skb)->tun_key = nla_data(nested_attr); >> break; >> >> case OVS_KEY_ATTR_ETHERNET: >> @@ -368,7 +386,8 @@ static int execute_set_action(struct sk_buff *skb, >> >> /* Execute a list of actions against 'skb'. */ >> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, >> - const struct nlattr *attr, int len, bool keep_skb) >> + const struct nlattr *attr, int len, >> + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb) >> { >> /* Every output action needs a separate clone of 'skb', but the common >> * case is just a single output action, so that doing a clone and >> @@ -407,7 +426,7 @@ static int do_execute_actions(struct datapath *dp, >> struct sk_buff *skb, >> break; >> >> case OVS_ACTION_ATTR_SET: >> - err = execute_set_action(skb, nla_data(a)); >> + err = execute_set_action(skb, nla_data(a), tun_key); >> break; >> >> case OVS_ACTION_ATTR_SAMPLE: >> @@ -458,6 +477,9 @@ int ovs_execute_actions(struct datapath *dp, struct >> sk_buff *skb) >> struct sw_flow_actions *acts = >> rcu_dereference(OVS_CB(skb)->flow->sf_acts); >> struct loop_counter *loop; >> int error; >> + struct ovs_key_ipv4_tunnel tun_key; >> + >> + memset(&tun_key, 0, sizeof(tun_key)); >> > tun key will zeroed while executing tunnel set action, Do we still > need to zero it here? > >> /* Check whether we've looped too much. */ >> loop = &__get_cpu_var(loop_counters); >> @@ -469,9 +491,9 @@ int ovs_execute_actions(struct datapath *dp, struct >> sk_buff *skb) >> goto out_loop; >> } >> >> - OVS_CB(skb)->tun_id = 0; >> + OVS_CB(skb)->tun_key = NULL; >> error = do_execute_actions(dp, skb, acts->actions, >> - acts->actions_len, false); >> + acts->actions_len, &tun_key, false); >> >> /* Check whether sub-actions looped too much. */ >> if (unlikely(loop->looping)) >> diff --git a/datapath/datapath.c b/datapath/datapath.c >> index a6915fb..d8a198e 100644 >> --- a/datapath/datapath.c >> +++ b/datapath/datapath.c >> @@ -587,12 +587,20 @@ static int validate_set(const struct nlattr *a, >> >> switch (key_type) { >> const struct ovs_key_ipv4 *ipv4_key; >> + const struct ovs_key_ipv4_tunnel *tun_key; >> >> case OVS_KEY_ATTR_PRIORITY: >> case OVS_KEY_ATTR_TUN_ID: >> case OVS_KEY_ATTR_ETHERNET: >> break; >> >> + case OVS_KEY_ATTR_IPV4_TUNNEL: >> + tun_key = nla_data(ovs_key); >> + if (!tun_key->ipv4_dst) { >> + return -EINVAL; >> + } >> + break; >> + >> case OVS_KEY_ATTR_IPV4: >> if (flow_key->eth.type != htons(ETH_P_IP)) >> return -EINVAL; >> @@ -785,7 +793,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, >> struct genl_info *info) >> >> err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, >> &flow->key.phy.in_port, >> - &flow->key.phy.tun_id, >> + &flow->key.tun.tun_key, >> a[OVS_PACKET_ATTR_KEY]); >> if (err) >> goto err_flow_put; >> diff --git a/datapath/datapath.h b/datapath/datapath.h >> index affbf0e..c5df12d 100644 >> --- a/datapath/datapath.h >> +++ b/datapath/datapath.h >> @@ -96,7 +96,8 @@ struct datapath { >> /** >> * struct ovs_skb_cb - OVS data in skb CB >> * @flow: The flow associated with this packet. May be %NULL if no flow. >> - * @tun_id: ID of the tunnel that encapsulated this packet. It is 0 if the >> + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the >> + * packet is not being tunneled. >> * @ip_summed: Consistently stores L4 checksumming status across different >> * kernel versions. >> * @csum_start: Stores the offset from which to start checksumming >> independent >> @@ -107,7 +108,7 @@ struct datapath { >> */ >> struct ovs_skb_cb { >> struct sw_flow *flow; >> - __be64 tun_id; >> + struct ovs_key_ipv4_tunnel *tun_key; >> #ifdef NEED_CSUM_NORMALIZE >> enum csum_type ip_summed; >> u16 csum_start; >> diff --git a/datapath/flow.c b/datapath/flow.c >> index d07337c..376f4be 100644 >> --- a/datapath/flow.c >> +++ b/datapath/flow.c >> @@ -629,7 +629,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, >> struct sw_flow_key *key, >> memset(key, 0, sizeof(*key)); >> >> key->phy.priority = skb->priority; >> - key->phy.tun_id = OVS_CB(skb)->tun_id; >> + if (OVS_CB(skb)->tun_key) >> + key->tun.tun_key = *OVS_CB(skb)->tun_key; >> key->phy.in_port = in_port; >> >> skb_reset_mac_header(skb); >> @@ -847,6 +848,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { >> >> /* Not upstream. */ >> [OVS_KEY_ATTR_TUN_ID] = sizeof(__be64), >> + [OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel), >> }; >> >> static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, >> @@ -1022,9 +1024,30 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, >> int *key_lenp, >> swkey->phy.in_port = DP_MAX_PORTS; >> } >> >> - if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { >> - swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); >> + /* OVS_KEY_ATTR_TUN_ID and OVS_KEY_ATTR_IPV4_TUNEL must both arrive >> + * together. >> + */ >> + if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) && >> + attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { >> + struct ovs_key_ipv4_tunnel *tun_key; >> + __be64 tun_id = 0; >> + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); >> + tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); >> + >> + if (tun_id != tun_key->tun_id) >> + return -EINVAL; >> + >> + swkey->tun.tun_key = *tun_key; >> attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); >> + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); >> + } else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { >> + swkey->tun.tun_key.tun_id = >> nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); >> + attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); >> + } else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { >> + struct ovs_key_ipv4_tunnel *tun_key; >> + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); >> + swkey->tun.tun_key = *tun_key; >> + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); >> } > > While validating set action OVS_KEY_ATTR_IPV4_TUNNEL, we are checking > for ipv4_dst, I think we need similar check here. > >> >> /* Data attributes. */ >> @@ -1162,14 +1185,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, >> int *key_lenp, >> * get the metadata, that is, the parts of the flow key that cannot be >> * extracted from the packet itself. >> */ >> -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 >> *tun_id, >> +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, >> + struct ovs_key_ipv4_tunnel *tun_key, >> const struct nlattr *attr) >> { >> const struct nlattr *nla; >> int rem; >> + __be64 tun_id = 0; >> >> *in_port = DP_MAX_PORTS; >> - *tun_id = 0; >> + memset(tun_key, 0, sizeof(*tun_key)); >> *priority = 0; >> >> nla_for_each_nested(nla, attr, rem) { >> @@ -1185,7 +1210,20 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 >> *in_port, __be64 *tun_id, >> break; >> >> case OVS_KEY_ATTR_TUN_ID: >> - *tun_id = nla_get_be64(nla); >> + tun_id = nla_get_be64(nla); >> + if ((tun_key->tun_flags & FLOW_TNL_F_KEY) && >> + tun_key->tun_id != tun_id) >> + return -EINVAL; >> + break; >> + >> + case OVS_KEY_ATTR_IPV4_TUNNEL: >> + if (tun_key->tun_flags & FLOW_TNL_F_KEY) >> + tun_id = tun_key->tun_id; >> + memcpy(tun_key, nla_data(nla), >> + sizeof(*tun_key)); >> + if ((tun_key->tun_flags & FLOW_TNL_F_KEY) && >> + tun_id != tun_key->tun_id) >> + return -EINVAL; >> break; >> >> case OVS_KEY_ATTR_IN_PORT: >> @@ -1210,10 +1248,20 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key >> *swkey, struct sk_buff *skb) >> nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) >> goto nla_put_failure; >> >> - if (swkey->phy.tun_id != cpu_to_be64(0) && >> - nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id)) >> + if ((swkey->tun.tun_key.tun_flags & FLOW_TNL_F_KEY) && >> + nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, >> swkey->tun.tun_key.tun_id)) >> goto nla_put_failure; >> >> + if (swkey->tun.tun_key.ipv4_dst) { >> + struct ovs_key_ipv4_tunnel *tun_key; >> + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL, >> + sizeof(*tun_key)); >> + if (!nla) >> + goto nla_put_failure; >> + tun_key = nla_data(nla); >> + *tun_key = swkey->tun.tun_key; >> + } >> + > > Why are we sending both OVS_KEY_ATTR_TUN_ID and > OVS_KEY_ATTR_IPV4_TUNNEL attributed here? > we could first check for ipv4_dst else for FLOW_TNL_F_KEY in flow key. > >> if (swkey->phy.in_port != DP_MAX_PORTS && >> nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) >> goto nla_put_failure; >> diff --git a/datapath/flow.h b/datapath/flow.h >> index 02c563a..4430b32 100644 >> --- a/datapath/flow.h >> +++ b/datapath/flow.h >> @@ -42,10 +42,12 @@ struct sw_flow_actions { >> >> struct sw_flow_key { >> struct { >> - __be64 tun_id; /* Encapsulating tunnel ID. */ >> u32 priority; /* Packet QoS priority. */ >> u16 in_port; /* Input switch port (or >> DP_MAX_PORTS). */ >> } phy; >> + struct { >> + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel >> key. */ >> + } tun; >> struct { >> u8 src[ETH_ALEN]; /* Ethernet source address. */ >> u8 dst[ETH_ALEN]; /* Ethernet destination address. */ >> @@ -150,6 +152,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); >> * ------ --- ------ ----- >> * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 >> * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 >> + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 >> * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 >> * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 >> * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) >> @@ -160,14 +163,15 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); >> * OVS_KEY_ATTR_ICMPV6 2 2 4 8 >> * OVS_KEY_ATTR_ND 28 -- 4 32 >> * ------------------------------------------------- >> - * total 156 >> + * total 184 >> */ >> -#define FLOW_BUFSIZE 156 >> +#define FLOW_BUFSIZE 184 >> >> int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); >> int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, >> const struct nlattr *); >> -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 >> *tun_id, >> +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, >> + struct ovs_key_ipv4_tunnel *tun_key, >> const struct nlattr *); >> >> #define MAX_ACTIONS_BUFSIZE (16 * 1024) >> diff --git a/datapath/tunnel.c b/datapath/tunnel.c >> index d651c11..739f098 100644 >> --- a/datapath/tunnel.c >> +++ b/datapath/tunnel.c >> @@ -367,9 +367,9 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 >> saddr, __be32 daddr, >> return NULL; > ñ> } >> >> -static void ecn_decapsulate(struct sk_buff *skb, u8 tos) >> +static void ecn_decapsulate(struct sk_buff *skb) >> { >> - if (unlikely(INET_ECN_is_ce(tos))) { >> + if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) { >> __be16 protocol = skb->protocol; >> >> skb_set_network_header(skb, ETH_HLEN); >> @@ -416,7 +416,7 @@ static void ecn_decapsulate(struct sk_buff *skb, u8 tos) >> * - skb->csum does not include the inner Ethernet header. >> * - The layer pointers are undefined. >> */ >> -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos) >> +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb) >> { >> struct ethhdr *eh; >> >> @@ -433,7 +433,7 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff >> *skb, u8 tos) >> skb_clear_rxhash(skb); >> secpath_reset(skb); >> >> - ecn_decapsulate(skb, tos); >> + ecn_decapsulate(skb); >> vlan_set_tci(skb, 0); >> >> if (unlikely(compute_ip_summed(skb, false))) { >> @@ -613,7 +613,7 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct >> sk_buff *nskb, >> >> bool ovs_tnl_frag_needed(struct vport *vport, >> const struct tnl_mutable_config *mutable, >> - struct sk_buff *skb, unsigned int mtu, __be64 >> flow_key) >> + struct sk_buff *skb, unsigned int mtu) >> { >> unsigned int eth_hdr_len = ETH_HLEN; >> unsigned int total_length = 0, header_length = 0, payload_length; >> @@ -697,17 +697,6 @@ bool ovs_tnl_frag_needed(struct vport *vport, >> ipv6_build_icmp(skb, nskb, mtu, payload_length); >> #endif >> >> - /* >> - * Assume that flow based keys are symmetric with respect to input >> - * and output and use the key that we were going to put on the >> - * outgoing packet for the fake received packet. If the keys are >> - * not symmetric then PMTUD needs to be disabled since we won't have >> - * any way of synthesizing packets. >> - */ >> - if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) == >> - (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) >> - OVS_CB(nskb)->tun_id = flow_key; >> - >> if (unlikely(compute_ip_summed(nskb, false))) { >> kfree_skb(nskb); >> return false; >> @@ -723,12 +712,20 @@ static bool check_mtu(struct sk_buff *skb, >> const struct tnl_mutable_config *mutable, >> const struct rtable *rt, __be16 *frag_offp) >> { >> - bool df_inherit = mutable->flags & TNL_F_DF_INHERIT; >> + bool df_inherit; >> bool pmtud = mutable->flags & TNL_F_PMTUD; >> - __be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : >> 0; >> + __be16 frag_off; >> int mtu = 0; >> unsigned int packet_length = skb->len - ETH_HLEN; >> >> + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) { >> + df_inherit = false; >> + frag_off = OVS_CB(skb)->tun_key->tun_flags & >> FLOW_TNL_F_DONT_FRAGMENT; >> + } else { >> + df_inherit = mutable->flags & TNL_F_DF_INHERIT; >> + frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) >> : 0; >> + } >> + > Why not set pmtud to false incase of flow-based tunneling like you did > it for df_inherit? > >> /* Allow for one level of tagging in the packet length. */ >> if (!vlan_tx_tag_present(skb) && >> eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) >> @@ -760,8 +757,7 @@ static bool check_mtu(struct sk_buff *skb, >> mtu = max(mtu, IP_MIN_MTU); >> >> if (packet_length > mtu && >> - ovs_tnl_frag_needed(vport, mutable, skb, mtu, >> - OVS_CB(skb)->tun_id)) >> + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) >> return false; >> } >> } >> @@ -777,8 +773,7 @@ static bool check_mtu(struct sk_buff *skb, >> mtu = max(mtu, IPV6_MIN_MTU); >> >> if (packet_length > mtu && >> - ovs_tnl_frag_needed(vport, mutable, skb, mtu, >> - OVS_CB(skb)->tun_id)) >> + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) >> return false; >> } >> } >> @@ -1000,15 +995,16 @@ unlock: >> } >> >> static struct rtable *__find_route(const struct tnl_mutable_config *mutable, >> - u8 ipproto, u8 tos) >> + __be32 saddr, __be32 daddr, u8 ipproto, >> + u8 tos) >> { >> /* Tunnel configuration keeps DSCP part of TOS bits, But Linux >> * router expect RT_TOS bits only. */ >> >> #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) >> struct flowi fl = { .nl_u = { .ip4_u = { >> - .daddr = mutable->key.daddr, >> - .saddr = mutable->key.saddr, >> + .daddr = daddr, >> + .saddr = saddr, >> .tos = RT_TOS(tos) } }, >> .proto = ipproto }; >> struct rtable *rt; >> @@ -1018,8 +1014,8 @@ static struct rtable *__find_route(const struct >> tnl_mutable_config *mutable, >> >> return rt; >> #else >> - struct flowi4 fl = { .daddr = mutable->key.daddr, >> - .saddr = mutable->key.saddr, >> + struct flowi4 fl = { .daddr = daddr, >> + .saddr = saddr, >> .flowi4_tos = RT_TOS(tos), >> .flowi4_proto = ipproto }; >> >> @@ -1029,7 +1025,8 @@ static struct rtable *__find_route(const struct >> tnl_mutable_config *mutable, >> >> static struct rtable *find_route(struct vport *vport, >> const struct tnl_mutable_config *mutable, >> - u8 tos, struct tnl_cache **cache) >> + __be32 saddr, __be32 daddr, u8 tos, >> + struct tnl_cache **cache) >> { >> struct tnl_vport *tnl_vport = tnl_vport_priv(vport); >> struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache); >> @@ -1037,18 +1034,21 @@ static struct rtable *find_route(struct vport *vport, >> *cache = NULL; >> tos = RT_TOS(tos); >> >> - if (likely(tos == RT_TOS(mutable->tos) && >> - check_cache_valid(cur_cache, mutable))) { >> + if (daddr == mutable->key.daddr && saddr == mutable->key.saddr && >> + tos == RT_TOS(mutable->tos) && >> + check_cache_valid(cur_cache, mutable)) { >> *cache = cur_cache; >> return cur_cache->rt; >> } else { >> struct rtable *rt; >> >> - rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos); >> + rt = __find_route(mutable, saddr, daddr, >> + tnl_vport->tnl_ops->ipproto, tos); >> if (IS_ERR(rt)) >> return NULL; >> >> - if (likely(tos == RT_TOS(mutable->tos))) >> + if (daddr == mutable->key.daddr && saddr == >> mutable->key.saddr && >> + tos == RT_TOS(mutable->tos)) >> *cache = build_cache(vport, mutable, rt); >> > I am not sure why checks are added on saddr and daddr here? > header caching is not set in case of flow based tunneling to > build_cache call is no-op anyways. > >> return rt; >> @@ -1178,8 +1178,11 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff >> *skb) >> struct rtable *rt; >> struct dst_entry *unattached_dst = NULL; >> struct tnl_cache *cache; >> + struct ovs_key_ipv4_tunnel tun_key; >> int sent_len = 0; >> __be16 frag_off = 0; >> + __be32 daddr; >> + __be32 saddr; >> u8 ttl; >> u8 inner_tos; >> u8 tos; >> @@ -1207,6 +1210,13 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff >> *skb) >> } >> #endif >> >> + /* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key >> here, >> + * and zero it out. >> + */ >> + if (OVS_CB(skb)->tun_key == NULL) { >> + memset(&tun_key, 0, sizeof(tun_key)); >> + OVS_CB(skb)->tun_key = &tun_key; >> + } >> /* ToS */ >> if (skb->protocol == htons(ETH_P_IP)) >> inner_tos = ip_hdr(skb)->tos; >> @@ -1219,11 +1229,23 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff >> *skb) >> >> if (mutable->flags & TNL_F_TOS_INHERIT) >> tos = inner_tos; >> + else if (OVS_CB(skb)->tun_key->ipv4_dst != 0) >> + tos = OVS_CB(skb)->tun_key->ipv4_tos; >> else >> tos = mutable->tos; >> >> + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) >> + daddr = OVS_CB(skb)->tun_key->ipv4_dst; >> + else >> + daddr = mutable->key.daddr; >> + >> + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) >> + saddr = OVS_CB(skb)->tun_key->ipv4_src; >> + else >> + saddr = mutable->key.saddr; >> + > > It will be easier to read if we have one if check on > OVS_CB(skb)->tun_key->ipv4_dst which is retrieve all tunnel parameter > for flow-based tunning and else block for campat code. > >> /* Route lookup */ >> - rt = find_route(vport, mutable, tos, &cache); >> + rt = find_route(vport, mutable, saddr, daddr, tos, &cache); >> if (unlikely(!rt)) >> goto error_free; >> if (unlikely(!cache)) >> @@ -1260,9 +1282,13 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff >> *skb) >> } >> >> /* TTL */ >> - ttl = mutable->ttl; >> - if (!ttl) >> - ttl = ip4_dst_hoplimit(&rt_dst(rt)); >> + if (OVS_CB(skb)->tun_key->ipv4_dst != 0) >> + ttl = OVS_CB(skb)->tun_key->ipv4_ttl; >> + else { >> + ttl = mutable->ttl; >> + if (!ttl) >> + ttl = ip4_dst_hoplimit(&rt_dst(rt)); >> + } >> >> if (mutable->flags & TNL_F_TTL_INHERIT) { > > This check could be in the compat code block, so it is less confusing. > >> if (skb->protocol == htons(ETH_P_IP)) >> @@ -1442,7 +1468,8 @@ static int tnl_set_config(struct net *net, struct >> nlattr *options, >> struct net_device *dev; >> struct rtable *rt; >> >> - rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos); >> + rt = __find_route(mutable, mutable->key.saddr, >> mutable->key.daddr, >> + tnl_ops->ipproto, mutable->tos); >> if (IS_ERR(rt)) >> return -EADDRNOTAVAIL; >> dev = rt_dst(rt).dev; >> diff --git a/datapath/tunnel.h b/datapath/tunnel.h >> index 1924017..b7858d3 100644 >> --- a/datapath/tunnel.h >> +++ b/datapath/tunnel.h >> @@ -269,14 +269,14 @@ int ovs_tnl_set_addr(struct vport *vport, const >> unsigned char *addr); >> const char *ovs_tnl_get_name(const struct vport *vport); >> const unsigned char *ovs_tnl_get_addr(const struct vport *vport); >> int ovs_tnl_send(struct vport *vport, struct sk_buff *skb); >> -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos); >> +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb); >> >> struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, >> __be64 key, int tunnel_type, >> const struct tnl_mutable_config **mutable); >> bool ovs_tnl_frag_needed(struct vport *vport, >> const struct tnl_mutable_config *mutable, >> - struct sk_buff *skb, unsigned int mtu, __be64 >> flow_key); >> + struct sk_buff *skb, unsigned int mtu); >> void ovs_tnl_free_linked_skbs(struct sk_buff *skb); >> >> int ovs_tnl_init(void); >> @@ -286,4 +286,15 @@ static inline struct tnl_vport *tnl_vport_priv(const >> struct vport *vport) >> return vport_priv(vport); >> } >> >> +static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, >> + const struct iphdr *iph, __be64 tun_id) >> +{ >> + tun_key->tun_id = tun_id; >> + tun_key->ipv4_src = iph->saddr; >> + tun_key->ipv4_dst = iph->daddr; >> + tun_key->ipv4_tos = iph->tos; >> + tun_key->ipv4_ttl = iph->ttl; >> + tun_key->tun_flags = 0; >> +} >> + >> #endif /* tunnel.h */ >> diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c >> index 05a099d..fbf50d9 100644 >> --- a/datapath/vport-capwap.c >> +++ b/datapath/vport-capwap.c >> @@ -220,7 +220,7 @@ static struct sk_buff *capwap_update_header(const struct >> vport *vport, >> struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); >> struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key >> *)(wsi + 1); >> >> - opt->key = OVS_CB(skb)->tun_id; >> + opt->key = OVS_CB(skb)->tun_key->tun_id; >> } >> >> udph->len = htons(skb->len - skb_transport_offset(skb)); >> @@ -316,6 +316,7 @@ static int capwap_rcv(struct sock *sk, struct sk_buff >> *skb) >> struct vport *vport; >> const struct tnl_mutable_config *mutable; >> struct iphdr *iph; >> + struct ovs_key_ipv4_tunnel tun_key; >> __be64 key = 0; >> >> if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN))) >> @@ -333,12 +334,11 @@ static int capwap_rcv(struct sock *sk, struct sk_buff >> *skb) >> goto error; >> } >> >> - if (mutable->flags & TNL_F_IN_KEY_MATCH) >> - OVS_CB(skb)->tun_id = key; >> - else >> - OVS_CB(skb)->tun_id = 0; >> + tnl_tun_key_init(&tun_key, iph, >> + mutable->flags & TNL_F_IN_KEY_MATCH ? key : 0); >> + OVS_CB(skb)->tun_key = &tun_key; >> >> - ovs_tnl_rcv(vport, skb, iph->tos); >> + ovs_tnl_rcv(vport, skb); >> goto out; >> >> error: >> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c >> index ab89c5b..8076b1d 100644 >> --- a/datapath/vport-gre.c >> +++ b/datapath/vport-gre.c >> @@ -103,7 +103,7 @@ static struct sk_buff *gre_update_header(const struct >> vport *vport, >> >> /* Work backwards over the options so the checksum is last. */ >> if (mutable->flags & TNL_F_OUT_KEY_ACTION) >> - *options = be64_get_low32(OVS_CB(skb)->tun_id); >> + *options = be64_get_low32(OVS_CB(skb)->tun_key->tun_id); >> > Here we do need check if we doing flow-based tunneling. in case of > flow-based tunneling mutable is shared between all tunnels, so there > is no-way to we can do port specific flags check from mutable. about > GRE checksum, we need to check for FLOW_TNL_F_CSUM flag for doing > checksum on a packet. > For both of these, to make this check I need to modify structurally the code, because I don't have access to the skbuff with the current code. So I need to modify the tunneling code to access that. Before proceeding down that path, wanted to verify this direction seems ok, because this seems like a larger change. Thanks, Kyle >> if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) >> options--; >> @@ -285,7 +285,7 @@ static void gre_err(struct sk_buff *skb, u32 info) >> #endif >> >> __skb_pull(skb, tunnel_hdr_len); >> - ovs_tnl_frag_needed(vport, mutable, skb, mtu, key); >> + ovs_tnl_frag_needed(vport, mutable, skb, mtu); >> __skb_push(skb, tunnel_hdr_len); >> > same here, tunnel_hdr_len is stored in mutable which is going to be > shared for all tunnels for a type in flow based tunneling, so we need > to calculate it on packet send. > >> out: >> @@ -327,6 +327,7 @@ static int gre_rcv(struct sk_buff *skb) >> const struct tnl_mutable_config *mutable; >> int hdr_len; >> struct iphdr *iph; >> + struct ovs_key_ipv4_tunnel tun_key; >> __be16 flags; >> __be64 key; >> >> @@ -351,15 +352,15 @@ static int gre_rcv(struct sk_buff *skb) >> goto error; >> } >> >> - if (mutable->flags & TNL_F_IN_KEY_MATCH) >> - OVS_CB(skb)->tun_id = key; >> - else >> - OVS_CB(skb)->tun_id = 0; >> + >> + tnl_tun_key_init(&tun_key, iph, >> + mutable->flags & TNL_F_IN_KEY_MATCH ? key : 0); >> + OVS_CB(skb)->tun_key = &tun_key; >> >> __skb_pull(skb, hdr_len); >> skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + >> ETH_HLEN); >> >> - ovs_tnl_rcv(vport, skb, iph->tos); >> + ovs_tnl_rcv(vport, skb); >> return 0; >> >> error: >> diff --git a/datapath/vport.c b/datapath/vport.c >> index dc7adfa..e8279fb 100644 >> --- a/datapath/vport.c >> +++ b/datapath/vport.c >> @@ -462,7 +462,7 @@ void ovs_vport_receive(struct vport *vport, struct >> sk_buff *skb) >> OVS_CB(skb)->flow = NULL; >> >> if (!(vport->ops->flags & VPORT_F_TUN_ID)) >> - OVS_CB(skb)->tun_id = 0; >> + OVS_CB(skb)->tun_key = NULL; >> >> ovs_dp_process_received_packet(vport, skb); >> } >> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h >> index f5c9cca..8334cc1 100644 >> --- a/include/linux/openvswitch.h >> +++ b/include/linux/openvswitch.h >> @@ -278,7 +278,8 @@ enum ovs_key_attr { >> OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ >> OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ >> OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ >> - OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ >> + OVS_KEY_ATTR_IPV4_TUNNEL = 62, /* struct ovs_key_ipv4_tunnel */ >> + OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ >> __OVS_KEY_ATTR_MAX >> }; >> >> @@ -360,6 +361,21 @@ struct ovs_key_nd { >> __u8 nd_tll[6]; >> }; >> >> +/* Values for ovs_key_ipv4_tunnel->tun_flags */ >> +#define FLOW_TNL_F_DONT_FRAGMENT (1 << 0) >> +#define FLOW_TNL_F_CSUM (1 << 1) >> +#define FLOW_TNL_F_KEY (1 << 2) >> + >> +struct ovs_key_ipv4_tunnel { >> + __be64 tun_id; >> + __u32 tun_flags; >> + __be32 ipv4_src; >> + __be32 ipv4_dst; >> + __u8 ipv4_tos; >> + __u8 ipv4_ttl; >> + __u8 pad[2]; >> +}; >> + >> /** >> * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. >> * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index c9e3210..797cb06 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -1179,6 +1179,7 @@ execute_set_action(struct ofpbuf *packet, const struct >> nlattr *a) >> case OVS_KEY_ATTR_TUN_ID: >> case OVS_KEY_ATTR_PRIORITY: >> case OVS_KEY_ATTR_IPV6: >> + case OVS_KEY_ATTR_IPV4_TUNNEL: >> /* not implemented */ >> break; >> >> diff --git a/lib/odp-util.c b/lib/odp-util.c >> index 257d7a7..9ed17ed 100644 >> --- a/lib/odp-util.c >> +++ b/lib/odp-util.c >> @@ -93,6 +93,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) >> case OVS_KEY_ATTR_UNSPEC: return "unspec"; >> case OVS_KEY_ATTR_ENCAP: return "encap"; >> case OVS_KEY_ATTR_PRIORITY: return "priority"; >> + case OVS_KEY_ATTR_TUN_ID: return "tun_id"; >> + case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel"; >> case OVS_KEY_ATTR_IN_PORT: return "in_port"; >> case OVS_KEY_ATTR_ETHERNET: return "eth"; >> case OVS_KEY_ATTR_VLAN: return "vlan"; >> @@ -105,7 +107,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) >> case OVS_KEY_ATTR_ICMPV6: return "icmpv6"; >> case OVS_KEY_ATTR_ARP: return "arp"; >> case OVS_KEY_ATTR_ND: return "nd"; >> - case OVS_KEY_ATTR_TUN_ID: return "tun_id"; >> >> case __OVS_KEY_ATTR_MAX: >> default: >> @@ -602,6 +603,7 @@ odp_flow_key_attr_len(uint16_t type) >> case OVS_KEY_ATTR_ENCAP: return -2; >> case OVS_KEY_ATTR_PRIORITY: return 4; >> case OVS_KEY_ATTR_TUN_ID: return 8; >> + case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct >> ovs_key_ipv4_tunnel); >> case OVS_KEY_ATTR_IN_PORT: return 4; >> case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); >> case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); >> @@ -668,6 +670,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds >> *ds) >> const struct ovs_key_icmpv6 *icmpv6_key; >> const struct ovs_key_arp *arp_key; >> const struct ovs_key_nd *nd_key; >> + const struct ovs_key_ipv4_tunnel *ipv4_tun_key; >> enum ovs_key_attr attr = nl_attr_type(a); >> int expected_len; >> >> @@ -698,6 +701,16 @@ format_odp_key_attr(const struct nlattr *a, struct ds >> *ds) >> ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); >> break; >> >> + case OVS_KEY_ATTR_IPV4_TUNNEL: >> + ipv4_tun_key = nl_attr_get(a); >> + ds_put_format(ds, "(tun_id=0x%"PRIx64",flags=0x%"PRIx32 >> + >> ",src="IP_FMT",dst="IP_FMT",tos=0x%"PRIx8",ttl=%"PRIu8")", >> + ntohll(ipv4_tun_key->tun_id), ipv4_tun_key->tun_flags, >> + IP_ARGS(&ipv4_tun_key->ipv4_src), >> + IP_ARGS(&ipv4_tun_key->ipv4_dst), >> + ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl); >> + break; >> + >> case OVS_KEY_ATTR_IN_PORT: >> ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a)); >> break; >> diff --git a/lib/odp-util.h b/lib/odp-util.h >> index 16f2b15..57073ba 100644 >> --- a/lib/odp-util.h >> +++ b/lib/odp-util.h >> @@ -80,6 +80,7 @@ int odp_actions_from_string(const char *, const struct >> simap *port_names, >> * ------ --- ------ ----- >> * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 >> * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 >> + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 >> * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 >> * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 >> * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) >> @@ -90,7 +91,7 @@ int odp_actions_from_string(const char *, const struct >> simap *port_names, >> * OVS_KEY_ATTR_ICMPV6 2 2 4 8 >> * OVS_KEY_ATTR_ND 28 -- 4 32 >> * ------------------------------------------------- >> - * total 156 >> + * total 184 >> * >> * We include some slack space in case the calculation isn't quite right or >> we >> * add another field and forget to adjust this value. >> -- >> 1.7.11.4 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev