Found one typo. Minor comment. there are many eth_type == ETH_P_MPLS_UC || eth_type == ETH_P_MPLS_UC. So how about introducing helper function like bool eth_p_mpls(eth_type).
On Mon, Dec 24, 2012 at 11:35:18AM +0900, Simon Horman wrote: > Allow a flow to be visible in two forms, an outer flow and an inner flow. > This may occur if the actions allow further decoding of a packet to > provide a more fine-grained match. In this case the first-pass > coarse-grained match will hit the outer-flow and the second-pass > fined-grained match will hit the inner-flow. > > Inner-flows are not visible to user-space. Rather, they are just an > internal representation to handle the case of actions allowing further > packet decoding. An inner-flow is associated with its outer-flow and > deleted when the outer-flow is deleted. An outer-flow may have more than > one inner-flow but an inner-flow may only have one outer-flow. > > For example: > > In the case of MPLS, L3 and L4 information may not initially be decoded > from the frame as the ethernet type of the frame is an MPLS type and no > information is known about the type of the inner frame. > > However, the type of the inner frame may be provided by an mpls_pop action > in which case L3 and L4 information may be decoded providing a finer > grained match than is otherwise possible. > > Signed-off-by: Simon Horman <ho...@verge.net.au> > --- > datapath/datapath.c | 197 > +++++++++++++++++++++++++++++++++++++-------------- > datapath/flow.c | 71 ++++++++++++++----- > datapath/flow.h | 19 ++++- > 3 files changed, 212 insertions(+), 75 deletions(-) > > diff --git a/datapath/datapath.c b/datapath/datapath.c > index 9ed7c91..93a380f 100644 > --- a/datapath/datapath.c > +++ b/datapath/datapath.c > @@ -341,7 +341,7 @@ void ovs_dp_detach_port(struct vport *p) > void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) > { > struct datapath *dp = p->dp; > - struct sw_flow *flow; > + struct sw_flow *flow, *outer_flow = NULL; > struct dp_stats_percpu *stats; > u64 *stats_counter; > int error; > @@ -377,6 +377,7 @@ void ovs_dp_process_received_packet(struct vport *p, > struct sk_buff *skb) > kfree_skb(skb); > return; > } > + outer_flow = flow; > flow = ovs_flow_tbl_lookup(table, &key, > key_len); > } > } > @@ -398,6 +399,8 @@ void ovs_dp_process_received_packet(struct vport *p, > struct sk_buff *skb) > } > > stats_counter = &stats->n_hit; > + if (unlikely(outer_flow)) > + ovs_flow_used(outer_flow, skb); > ovs_flow_used(OVS_CB(skb)->flow, skb); > ovs_execute_actions(dp, skb); > > @@ -1115,43 +1118,24 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct > sw_flow *flow, > return skb; > } > > -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info > *info) > +enum flow_type { > + flow_inner, > + flow_outer, > + flow_singleton, > +}; > + > +static int ovs_flow_cmd_new_or_set__(struct sk_buff *skb, > + struct genl_info *info, > + struct nlattr **a, struct datapath *dp, > + struct sw_flow_key key, int key_len, > + enum flow_type flow_type, > + struct sw_flow **flowp) > { > - struct nlattr **a = info->attrs; > - struct ovs_header *ovs_header = info->userhdr; > - struct sw_flow_key key; > - struct sw_flow *flow; > - struct sk_buff *reply; > - struct datapath *dp; > - struct flow_table *table; > int error; > - int key_len; > - > - /* Extract key. */ > - error = -EINVAL; > - if (!a[OVS_FLOW_ATTR_KEY]) > - goto error; > - error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); > - if (error) > - goto error; > - > - /* Validate actions. */ > - if (a[OVS_FLOW_ATTR_ACTIONS]) { > - error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); > - if (error) > - goto error; > - } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { > - error = -EINVAL; > - goto error; > - } > - > - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); > - error = -ENODEV; > - if (!dp) > - goto error; > + struct sk_buff *reply = NULL; > + struct flow_table *table = genl_dereference(dp->table); > + struct sw_flow *flow = ovs_flow_tbl_lookup(table, &key, key_len); > > - table = genl_dereference(dp->table); > - flow = ovs_flow_tbl_lookup(table, &key, key_len); > if (!flow) { > struct sw_flow_actions *acts; > > @@ -1179,6 +1163,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff > *skb, struct genl_info *info) > goto error; > } > clear_stats(flow); > + if (flow_type == flow_inner) { > + flow->flags = SW_FLOW_F_NO_DUMP; > + *flowp = flow; > + } > > /* Obtain actions. */ > acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); > @@ -1187,16 +1175,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff > *skb, struct genl_info *info) > goto error_free_flow; > rcu_assign_pointer(flow->sf_acts, acts); > > + /* Add inner flow without locking */ > + if (flow_type == flow_outer && *flowp) > + list_add_rcu(&(*flowp)->inner_flows, > + &flow->inner_flows); > + > /* Put flow in bucket. */ > ovs_flow_tbl_insert(table, flow, &key, key_len); > > - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, > - info->snd_seq, > - OVS_FLOW_CMD_NEW); > + if (flow_type == flow_inner) > + reply = ovs_flow_cmd_build_info(flow, dp, > + info->snd_portid, > + info->snd_seq, > + OVS_FLOW_CMD_NEW); > } else { > /* We found a matching flow. */ > struct sw_flow_actions *old_acts; > struct nlattr *acts_attrs; > + bool may_clear_stats = true; > > /* Bail out if we're not allowed to modify an existing flow. > * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL > @@ -1206,8 +1202,16 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff > *skb, struct genl_info *info) > */ > error = -EEXIST; > if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && > - info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) > - goto error; > + info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) { > + /* If this is an outer_flow then it may duplicate > + * the outer_flow of another flow's inner_flow. > + * Just accept this and more on. > + */ > + if (flow_type != flow_outer) > + goto error; > + may_clear_stats = false; > + goto update; > + } > > /* Update actions. */ > old_acts = rcu_dereference_protected(flow->sf_acts, > @@ -1231,14 +1235,23 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff > *skb, struct genl_info *info) > reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, > info->snd_seq, OVS_FLOW_CMD_NEW); > > - /* Clear stats. */ > - if (a[OVS_FLOW_ATTR_CLEAR]) { > +update: > + if (a[OVS_FLOW_ATTR_CLEAR] || flow_type == flow_outer) { > spin_lock_bh(&flow->lock); > - clear_stats(flow); > + /* Clear stats. */ > + if (a[OVS_FLOW_ATTR_CLEAR] && may_clear_stats) > + clear_stats(flow); > + /* Add inner flow */ > + if (flow_type == flow_outer) > + list_add_rcu(&(*flowp)->inner_flows, > + &flow->inner_flows); > spin_unlock_bh(&flow->lock); > } > } > > + if (!reply) > + return 0; > + > if (!IS_ERR(reply)) > genl_notify(reply, genl_info_net(info), info->snd_portid, > ovs_dp_flow_multicast_group.id, info->nlhdr, > @@ -1254,6 +1267,66 @@ error: > return error; > } > > +static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info > *info) > +{ > + struct nlattr **a = info->attrs; > + struct ovs_header *ovs_header = info->userhdr; > + struct sw_flow_key key; > + struct sw_flow *inner_flow = NULL; > + struct datapath *dp; > + int error; > + int key_len[2]; > + > + /* Extract key. */ > + error = -EINVAL; > + if (!a[OVS_FLOW_ATTR_KEY]) > + goto error; > + error = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]); > + if (error) > + goto error; > + > + /* Validate actions. */ > + if (a[OVS_FLOW_ATTR_ACTIONS]) { > + error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); > + if (error) > + goto error; > + } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { > + error = -EINVAL; > + goto error; > + } > + > + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); > + error = -ENODEV; > + if (!dp) > + goto error; > + > + if (key_len[1]) { > + error = ovs_flow_cmd_new_or_set__(skb, info, a, dp, > + key, key_len[1], > + flow_inner, > + &inner_flow); > + if (error) > + goto error; > + } > + > + error = ovs_flow_cmd_new_or_set__(skb, info, a, dp, > + key, key_len[0], > + key_len[1] ? flow_outer : > + flow_singleton, > + &inner_flow); > + if (error) { > + if (key_len[1]) { > + struct flow_table *table = genl_dereference(dp->table); > + ovs_flow_tbl_remove(table, inner_flow); > + ovs_flow_deferred_free(inner_flow); > + } > + goto error; > + } > + > +error: > + return error; > +} > + > static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) > { > struct nlattr **a = info->attrs; > @@ -1264,11 +1337,11 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, > struct genl_info *info) > struct datapath *dp; > struct flow_table *table; > int err; > - int key_len; > + int key_len[2]; > > if (!a[OVS_FLOW_ATTR_KEY]) > return -EINVAL; > - err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); > + err = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]); > if (err) > return err; > > @@ -1277,7 +1350,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct > genl_info *info) > return -ENODEV; > > table = genl_dereference(dp->table); > - flow = ovs_flow_tbl_lookup(table, &key, key_len); > + flow = ovs_flow_tbl_lookup(table, &key, > + key_len[1] ? key_len[1] : key_len[0]); > if (!flow) > return -ENOENT; > > @@ -1295,11 +1369,11 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, > struct genl_info *info) > struct ovs_header *ovs_header = info->userhdr; > struct sw_flow_key key; > struct sk_buff *reply; > - struct sw_flow *flow; > + struct sw_flow *notify_flow, *outer_flow, *inner_flow; > struct datapath *dp; > struct flow_table *table; > int err; > - int key_len; > + int key_len[2]; > > dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); > if (!dp) > @@ -1308,26 +1382,39 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, > struct genl_info *info) > if (!a[OVS_FLOW_ATTR_KEY]) > return flush_flows(dp); > > - err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); > + err = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]); > if (err) > return err; > > table = genl_dereference(dp->table); > - flow = ovs_flow_tbl_lookup(table, &key, key_len); > - if (!flow) > + notify_flow = outer_flow = ovs_flow_tbl_lookup(table, &key, key_len[0]); > + if (!outer_flow) > return -ENOENT; > > - reply = ovs_flow_cmd_alloc_info(flow); > + if (key_len[1]) { > + notify_flow = inner_flow = ovs_flow_tbl_lookup(table, &key, > + key_len[1]); > + if (!inner_flow) > + return -ENOENT; > + } else { > + inner_flow = NULL; > + } > + > + reply = ovs_flow_cmd_alloc_info(notify_flow); > if (!reply) > return -ENOMEM; > > - ovs_flow_tbl_remove(table, flow); > + ovs_flow_tbl_remove(table, outer_flow); > + if (inner_flow) > + ovs_flow_tbl_remove(table, inner_flow); > > - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, > + err = ovs_flow_cmd_fill_info(notify_flow, dp, reply, info->snd_portid, > info->snd_seq, 0, OVS_FLOW_CMD_DEL); > BUG_ON(err < 0); > > - ovs_flow_deferred_free(flow); > + ovs_flow_deferred_free(outer_flow); > + if (inner_flow) > + ovs_flow_deferred_free(inner_flow); > > genl_notify(reply, genl_info_net(info), info->snd_portid, > ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); > diff --git a/datapath/flow.c b/datapath/flow.c > index 7e7dc0a..13c67ca 100644 > --- a/datapath/flow.c > +++ b/datapath/flow.c > @@ -226,7 +226,9 @@ struct sw_flow *ovs_flow_alloc(void) > return ERR_PTR(-ENOMEM); > > spin_lock_init(&flow->lock); > + INIT_LIST_HEAD(&flow->inner_flows); > flow->sf_acts = NULL; > + flow->flags = 0; > > return flow; > } > @@ -343,7 +345,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table > *table, u32 *bucket, u32 *la > i = 0; > head = flex_array_get(table->buckets, *bucket); > hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) { > - if (i < *last) { > + if (i < *last || flow->flags | SW_FLOW_F_NO_DUMP) { typo & > i++; > continue; > } > @@ -878,13 +880,25 @@ void ovs_flow_tbl_insert(struct flow_table *table, > struct sw_flow *flow, > __flow_tbl_insert(table, flow); > } > > -void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) > +static void __flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) > { > hlist_del_rcu(&flow->hash_node[table->node_ver]); > table->count--; > BUG_ON(table->count < 0); > } > > +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) > +{ > + struct sw_flow *inner_flow; > + > + __flow_tbl_remove(table, flow); > + > + list_for_each_entry_rcu(inner_flow, &flow->inner_flows, inner_flows) { > + __flow_tbl_remove(table, inner_flow); > + ovs_flow_deferred_free(inner_flow); > + } > +} > + > /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ > const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { > [OVS_KEY_ATTR_ENCAP] = -1, > @@ -1051,7 +1065,7 @@ static int parse_flow_nlattrs(const struct nlattr *attr, > * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute > * sequence. > */ > -int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, > +int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int key_lenp[2], > const struct nlattr *attr) > { > const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; > @@ -1059,6 +1073,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, > int *key_lenp, > int key_len; > u64 attrs; > int err; > + __be16 eth_type; > > memset(swkey, 0, sizeof(struct sw_flow_key)); > key_len = SW_FLOW_KEY_OFFSET(eth); > @@ -1172,7 +1187,30 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, > int *key_lenp, > swkey->eth.type = htons(ETH_P_802_2); > } > > - if (swkey->eth.type == htons(ETH_P_IP)) { > + eth_type = swkey->eth.type; > + if (eth_type == htons(ETH_P_MPLS_UC) || > + eth_type == htons(ETH_P_MPLS_MC)) { > + const struct ovs_key_mpls *mpls_key; > + > + if (!(attrs & (1 << OVS_KEY_ATTR_MPLS))) > + return -EINVAL; > + attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > + > + key_len = SW_FLOW_KEY_OFFSET(mpls.top_label); > + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > + swkey->mpls.top_label = mpls_key->mpls_top_label; > + > + if (attrs & (1 << OVS_KEY_ATTR_IPV4)) > + eth_type = htons(ETH_P_IP); > + if (attrs & (1 << OVS_KEY_ATTR_IPV6)) > + eth_type = htons(ETH_P_IPV6); > + if (attrs & (1 << OVS_KEY_ATTR_ARP)) > + eth_type = htons(ETH_P_ARP); > + } > + > + key_lenp[0] = key_len; > + > + if (eth_type == htons(ETH_P_IP)) { > const struct ovs_key_ipv4 *ipv4_key; > > if (!(attrs & (1 << OVS_KEY_ATTR_IPV4))) > @@ -1195,7 +1233,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, > int *key_lenp, > if (err) > return err; > } > - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { > + } else if (eth_type == htons(ETH_P_IPV6)) { > const struct ovs_key_ipv6 *ipv6_key; > > if (!(attrs & (1 << OVS_KEY_ATTR_IPV6))) > @@ -1221,8 +1259,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, > int *key_lenp, > if (err) > return err; > } > - } else if (swkey->eth.type == htons(ETH_P_ARP) || > - swkey->eth.type == htons(ETH_P_RARP)) { > + } else if (eth_type == htons(ETH_P_ARP) || > + eth_type == htons(ETH_P_RARP)) { > const struct ovs_key_arp *arp_key; > > if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) > @@ -1238,22 +1276,17 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, > int *key_lenp, > swkey->ip.proto = ntohs(arp_key->arp_op); > memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); > memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); > - } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) || > - swkey->eth.type == htons(ETH_P_MPLS_MC)) { > - const struct ovs_key_mpls *mpls_key; > - > - if (!(attrs & (1 << OVS_KEY_ATTR_MPLS))) > - return -EINVAL; > - attrs &= ~(1 << OVS_KEY_ATTR_MPLS); > - > - key_len = SW_FLOW_KEY_OFFSET(mpls.top_label); > - mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > - swkey->mpls.top_label = mpls_key->mpls_top_label; > } > > if (attrs) > return -EINVAL; > - *key_lenp = key_len; > + > + if (eth_type == swkey->eth.type) { > + key_lenp[0] = key_len; > + key_lenp[1] = 0; > + } else { > + key_lenp[1] = key_len; > + } > > return 0; > } > diff --git a/datapath/flow.h b/datapath/flow.h > index c476e82..ebea079 100644 > --- a/datapath/flow.h > +++ b/datapath/flow.h > @@ -98,6 +98,8 @@ struct sw_flow_key { > }; > }; > > +#define SW_FLOW_F_NO_DUMP 0x01 > + > struct sw_flow { > struct rcu_head rcu; > struct hlist_node hash_node[2]; > @@ -107,10 +109,25 @@ struct sw_flow { > struct sw_flow_actions __rcu *sf_acts; > > spinlock_t lock; /* Lock for values below. */ > + > + struct list_head inner_flows; /* Inner flows. > + * For an inner flow this is its > + * entry in the hlist of its outer > + * flow. > + * For an outer flow this is the > + * list of inner flows > + * For a singleton flow this is > + * unused > + */ > + > unsigned long used; /* Last used time (in jiffies). */ > u64 packet_count; /* Number of packets matched. */ > u64 byte_count; /* Number of bytes matched. */ > u8 tcp_flags; /* Union of seen TCP flags. */ > + /* End values that need loc*/ > + > + u8 flags; /* SW_FLOW_F_* */ > + > }; > > struct arp_eth_header { > @@ -142,7 +159,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const > struct nlattr *); > void ovs_flow_deferred_free_acts(struct sw_flow_actions *); > > int ovs_flow_extract_l3_onwards(struct sk_buff *, struct sw_flow_key *, > - int *key_lenp, __be16 eth_type); > + int key_lenp[2], __be16 eth_type); > int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, > int *key_lenp); > void ovs_flow_used(struct sw_flow *, struct sk_buff *); > -- > 1.7.10.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > -- yamahata _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev