From 07e6a8fa5aa7c294df72890d98c8cfa72aa1d613 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 18 Jun 2013 14:43:02 -0700
Subject: [PATCH] Cleanups

---
 datapath/README     |  43 +++++++++++-------
 datapath/datapath.c |   4 +-
 datapath/flow.c     | 123 ++++++++++++++++++++++++----------------------------
 datapath/flow.h     |   7 +--
 4 files changed, 89 insertions(+), 88 deletions(-)

diff --git a/datapath/README b/datapath/README
index 5347abb..37c20ee 100644
--- a/datapath/README
+++ b/datapath/README
@@ -94,29 +94,42 @@ Often we ellipsize arguments not important to the discussion, e.g.:
 Wildcarded flow key format
 --------------------------
 
-A Wildcarded flow are described with two sequences Netlink attributes
-passed over the Netlink socket. A flow key, exactly as described above and an
+A wildcarded flow is described with two sequences of Netlink attributes
+passed over the Netlink socket. A flow key, exactly as described above, and an
 optional corresponding flow mask.
 
 A wildcarded flow can represent a group of exact match flows. Each '1' bit
 in the mask specifies a exact match with the corresponding bit in the flow key.
-While '0' bit specifies a don't care bit, which will match either a
-'1' or '0' bit of a incoming packet. Using wildcarded flow can improve the
-flow set up rate, by reduce the number of new flows need to be processed by
-the user space program.
-
-Notice the support of the mask Netlink attribute are optional for both
-the kernel and user space program. The kernel can ignore
-the mask attribute, only install exact match flow, or reduce the number
-of don't care bits in kernel than what was specified by the user space program.
+A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
+of a incoming packet. Using wildcarded flow can improve the flow set up rate
+by reduce the number of new flows need to be processed by the user space program.
+
+Support for the mask Netlink attribute is optional for both the kernel and user
+space program. The kernel can ignore the mask attribute, installing an exact
+match flow, or reduce the number of don't care bits in the kernel to less than
+what was specified by the user space program. In this case, variations in bits
+that the kernel does not implement will simply result in additional flow setups.
 The kernel module will also work with user space programs that neither support
 nor supply flow mask attributes.
 
-The behavior of over lapping wildcarded flow is undefined. It is the
-responsibility of the user space programs to ensure that any incoming packet
-can match at most one flow, wildcarded or not. 
+Since the kernel may ignore or modify wildcard bits, it can be difficult for
+the userspace program to know exactly what matches are installed. There are
+two possible approaches: reactively install flows as they miss the kernel
+flow table (and therefore not attempt to determine wildcard changes at all)
+or use the kernel's response messages to determine the installed wildcards.
+
+When interacting with userspace, the kernel should maintain the match portion
+of the key exactly as originally installed. This will provides a handle to
+identify the flow for all future operations. However, when reporting the
+mask of an installed flow, the mask should include any restrictions imposed
+by the kernel.
+
+The behavior when using overlapping wildcarded flows is undefined. It is the
+responsibility of the user space program to ensure that any incoming packet
+can match at most one flow, wildcarded or not. The current implementation
+performs best-effort detection of overlapping wildcarded flows and may reject
+some but not all of them. However, this behavior may change in future versions.
 
-Current implementation neither detects nor rejects any overlapping wildcarded flows. However, this behavior can change in future versions. 
 
 Basic rule for evolving flow keys
 ---------------------------------
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 6bf8286..0a0fe43 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -1324,7 +1324,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			/* Allocate a new mask if none exsits. */
 			mask_p = ovs_sw_flow_mask_alloc();
 			if (!mask_p)
-				goto err_unlock_ovs;
+				goto err_flow_free;
 			mask_p->key = mask.key;
 			mask_p->range = mask.range;
 			ovs_sw_flow_mask_insert(mask_p);
@@ -1384,6 +1384,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 				ovs_dp_flow_multicast_group.id,	PTR_ERR(reply));
 	return 0;
 
+err_flow_free:
+	ovs_flow_free(flow);
 err_unlock_ovs:
 	ovs_unlock();
 err_kfree:
diff --git a/datapath/flow.c b/datapath/flow.c
index 2b7d9ed..d79190f 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -50,7 +50,6 @@ static LIST_HEAD(mask_list);  /* List access is proteced by ovs_mutex. */
 
 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
 		struct sw_flow_key_range *range, u8 val);
-static void ovs_sw_flow_mask_list_destroy(void);
 
 static void update_range__(struct sw_flow_match *match,
 			  size_t offset, size_t size, bool is_mask)
@@ -121,7 +120,7 @@ void ovs_match_init(struct sw_flow_match *match,
 static bool ovs_match_validate(const struct sw_flow_match *match,
 		u64 key_attrs, u64 mask_attrs)
 {
-	u64 key_expected = 0;
+	u64 key_expected = OVS_KEY_ATTR_ETHERNET;
 	u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 
 	/* The following mask attributes allowed only if they
@@ -135,7 +134,8 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
 			| (1ULL << OVS_KEY_ATTR_ARP)
 			| (1ULL << OVS_KEY_ATTR_ND));
 
-	if (match->key->eth.type == htons(ETH_P_802_2))
+	if (match->key->eth.type == htons(ETH_P_802_2) &&
+	    match->mask && (match->mask->key.eth.type == htons(0xffff)))
 		mask_allowed |= (1ULL << OVS_KEY_ATTR_ETHERTYPE);
 
 	/* Check key attributes. */
@@ -196,13 +196,8 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
 					mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
 
 				if (match->key->ipv6.tp.src ==
-						htons(NDISC_NEIGHBOUR_SOLICITATION)) {
-					key_expected |= 1ULL << OVS_KEY_ATTR_ND;
-					if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
-						mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
-				}
-
-				if (match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+						htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+				    match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 					key_expected |= 1ULL << OVS_KEY_ATTR_ND;
 					if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
 						mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
@@ -510,9 +505,6 @@ void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
 	if (!table)
 		return;
 
-	if (!table->keep_flows)
-		ovs_sw_flow_mask_list_destroy();
-
 	call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
 }
 
@@ -596,13 +588,19 @@ struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
 	return __flow_tbl_rehash(table, table->n_buckets * 2);
 }
 
+static void __flow_free(struct sw_flow *flow)
+{
+	kfree((struct sf_flow_acts __force *)flow->sf_acts);
+	kmem_cache_free(flow_cache, flow);
+}
+
 void ovs_flow_free(struct sw_flow *flow)
 {
 	if (unlikely(!flow))
 		return;
 
-	kfree((struct sf_flow_acts __force *)flow->sf_acts);
-	kmem_cache_free(flow_cache, flow);
+	ovs_sw_flow_mask_del_ref(ovsl_dereference(flow->mask));
+	__flow_free(flow);
 }
 
 /* RCU callback used by ovs_flow_deferred_free. */
@@ -610,13 +608,16 @@ static void rcu_free_flow_callback(struct rcu_head *rcu)
 {
 	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
 
-	ovs_flow_free(flow);
+	__flow_free(flow);
 }
 
 /* Schedules 'flow' to be freed after the next RCU grace period.
  * The caller must hold rcu_read_lock for this to be sensible. */
 void ovs_flow_deferred_free(struct sw_flow *flow)
 {
+	if (unlikely(!flow))
+		return;
+
 	ovs_sw_flow_mask_del_ref(ovsl_dereference(flow->mask));
 	call_rcu(&flow->rcu, rcu_free_flow_callback);
 }
@@ -700,7 +701,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 			int nh_len)
 {
 	struct icmp6hdr *icmp = icmp6_hdr(skb);
-	int error = 0;
 
 	/* The ICMPv6 type and code fields use the 16-bit transport port
 	 * fields, so we need to store them in 16-bit network byte order.
@@ -719,11 +719,10 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 		 * entire packet.
 		 */
 		if (unlikely(icmp_len < sizeof(*nd)))
-			return error;
-		if (unlikely(skb_linearize(skb))) {
-			error = -ENOMEM;
-			return error;
-		}
+			return 0;
+
+		if (unlikely(skb_linearize(skb)))
+			return -ENOMEM;
 
 		nd = (struct nd_msg *)skb_transport_header(skb);
 		key->ipv6.nd.target = nd->target;
@@ -736,7 +735,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 			int opt_len = nd_opt->nd_opt_len * 8;
 
 			if (unlikely(!opt_len || opt_len > icmp_len))
-				return error;
+				return 0;
 
 			/* Store the link layer address if the appropriate
 			 * option is provided.  It is considered an error if
@@ -761,14 +760,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 		}
 	}
 
-	return error;
+	return 0;
 
 invalid:
 	memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
 	memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
 	memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
 
-	return error;
+	return 0;
 }
 
 /**
@@ -797,7 +796,7 @@ invalid:
  */
 int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 {
-	int error = 0;
+	int error;
 	struct ethhdr *eth;
 
 	memset(key, 0, sizeof(*key));
@@ -859,7 +858,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 		offset = nh->frag_off & htons(IP_OFFSET);
 		if (offset) {
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
-			return error;
+			return 0;
 		}
 		if (nh->frag_off & htons(IP_MF) ||
 			 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
@@ -913,15 +912,17 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 
 		nh_len = parse_ipv6hdr(skb, key);
 		if (unlikely(nh_len < 0)) {
-			if (nh_len == -EINVAL)
+			if (nh_len == -EINVAL) {
 				skb->transport_header = skb->network_header;
-			else
+				error = 0;
+			} else {
 				error = nh_len;
+			}
 			return error;
 		}
 
 		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
-			return error;
+			return 0;
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
 
@@ -941,11 +942,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 		} else if (key->ip.proto == NEXTHDR_ICMP) {
 			if (icmp6hdr_ok(skb)) {
 				error = parse_icmpv6(skb, key, nh_len);
+				if (error)
+					return error;
 			}
 		}
 	}
 
-	return error;
+	return 0;
 }
 
 static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
@@ -1325,6 +1328,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
 
 		SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
 		attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
+	} else if (!is_mask) {
+		SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
 	}
 
 	if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
@@ -1521,9 +1526,17 @@ int ovs_match_from_nlattrs(struct sw_flow_match *match,
 			return err;
 
 		if ((mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) && encap_valid) {
-			mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
-			encap = m[OVS_KEY_ATTR_ENCAP];
-			err = parse_flow_mask_nlattrs(encap, m, &mask_attrs);
+			__be16 eth_type = 0;
+
+			if (m[OVS_KEY_ATTR_ETHERTYPE])
+				eth_type = nla_get_be16(m[OVS_KEY_ATTR_ETHERTYPE]);
+			if (eth_type == htons(0xffff)) {
+				mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
+				encap = m[OVS_KEY_ATTR_ENCAP];
+				err = parse_flow_mask_nlattrs(encap, m, &mask_attrs);
+			} else
+				err = -EINVAL;
+
 			if (err)
 				return err;
 		}
@@ -1532,21 +1545,12 @@ int ovs_match_from_nlattrs(struct sw_flow_match *match,
 		if (err)
 			return err;
 	} else {
-		/* Populate Micro flow's key mask. */
+		/* Populate exact match flow's key mask. */
 		if (match->mask)
 			ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
 	}
 
-	if (!(key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) {
-		if (match->mask)
-			if (match->mask->key.eth.type != htons(0xffff)
-				&& (match->mask->key.eth.type != htons(0)))
-				return -EINVAL;
-
-		match->key->eth.type = htons(ETH_P_802_2);
-	}
-
-	if (ovs_match_validate(match, key_attrs, mask_attrs) == false)
+	if (!ovs_match_validate(match, key_attrs, mask_attrs))
 		return -EINVAL;
 
 	return 0;
@@ -1619,21 +1623,19 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
 	    nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
 		goto nla_put_failure;
 
-	if (!is_all_zero((u8 *)&output->eth,  2 * ETH_ALEN)) {
-		nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
-		if (!nla)
-			goto nla_put_failure;
+	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+	if (!nla)
+		goto nla_put_failure;
 
-		eth_key = nla_data(nla);
-		memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
-		memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
-	}
+	eth_key = nla_data(nla);
+	memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
+	memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
 
 	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
 		__be16 eth_type;
 		eth_type = (swkey == output) ? htons(ETH_P_8021Q) : htons(0xffff) ;
 		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
-				nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
 			goto nla_put_failure;
 		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
 		if (!swkey->eth.tci)
@@ -1868,19 +1870,6 @@ void ovs_sw_flow_mask_insert(struct sw_flow_mask *mask)
 }
 
 /**
- * Delete the entire mask list, free all masks on the list
- */
-static void ovs_sw_flow_mask_list_destroy(void)
-{
-	struct list_head *ml, *tmp;
-
-	list_for_each_safe(ml, tmp, &mask_list) {
-		struct sw_flow_mask *m;
-		m = container_of(ml, struct sw_flow_mask, list);
-		ovs_sw_flow_mask_deferred_destroy(m);
-	}
-}
-/**
  * Set 'range' fields in the mask to the value of 'val'.
  */
 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
diff --git a/datapath/flow.h b/datapath/flow.h
index 1bddb96..be0bc2f 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -138,7 +138,7 @@ struct sw_flow_key_range {
 	size_t end;
 };
 
-static inline u16 sw_flow_key_range_actual_size(const struct sw_flow_key_range *range)
+static inline u16 ovs_sw_flow_key_range_actual_size(const struct sw_flow_key_range *range)
 {
 	return range->end - range->start;
 }
@@ -246,7 +246,7 @@ struct sw_flow_mask {
 static inline u16
 ovs_sw_flow_mask_actual_size(const struct sw_flow_mask *mask)
 {
-	return sw_flow_key_range_actual_size(&mask->range);
+	return ovs_sw_flow_key_range_actual_size(&mask->range);
 }
 
 static inline u16
@@ -260,7 +260,4 @@ void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
 void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *);
 void ovs_sw_flow_mask_insert(struct sw_flow_mask *);
 struct sw_flow_mask *ovs_sw_flow_mask_find(const struct sw_flow_mask *);
-int ovs_mega_flow_from_nlattrs(struct sw_flow_key *key,
-			       struct sw_flow_mask *mask,
-			       const struct nlattr *);
 #endif /* flow.h */
-- 
1.8.1.2

