Dear all, I implemented a following patch to accelerate Open vSwitch datapath. Any comments are highly appreciated.
Thanks, - Michio From 944112a356c58f3e41a022a834bcc258ae0eafda Mon Sep 17 00:00:00 2001 From: Michio Honda <michio.ho...@neclab.eu> Date: Tue, 30 Sep 2014 16:44:11 +0200 Subject: [PATCH] Opportunistic acceleration based on installed flows This patch implements a framework to accelerate Open vSwitch datapath by optimizing flow extraction from packets and/or packet lookup, based on installed flows. As a proof-of-concept, this patch implements a fastpath for L2 source and destination MAC address. This fastpath is activated when all the flows in the datapath involve in only L2 source and destination MAC address (the other fields are wildcarded), and lets Open vSwitch parse packets against only these fields, instead of parsing all the OpenFlow-related fields. When flows that involve with the other fields are installed, default flow extraction and lookup routine is used. This fastpath improves forwarding rate between 10 Gbps NICs by 12 % for 64 byte frames. Note that slow packet I/O (around 1Mpps) highly affects this performance - we will see larger improvement if we use faster packet I/O. Signed-off-by: Michio Honda <michio.ho...@neclab.eu> --- datapath/actions.c | 1 + datapath/datapath.c | 30 +++++++-- datapath/datapath.h | 1 + datapath/flow.c | 38 +++++++++++ datapath/flow.h | 30 +++++++++ datapath/flow_netlink.c | 55 ---------------- datapath/flow_table.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++ datapath/flow_table.h | 8 +++ 8 files changed, 271 insertions(+), 60 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 39a21f4..566ecc3 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -506,6 +506,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, BUG_ON(!OVS_CB(skb)->pkt_key); upcall.cmd = OVS_PACKET_CMD_ACTION; + ovs_flow_rebuild_key(skb, OVS_CB(skb)->pkt_key); upcall.key = OVS_CB(skb)->pkt_key; upcall.userdata = NULL; upcall.portid = 0; diff --git a/datapath/datapath.c b/datapath/datapath.c index 3a9aebe..835f1a2 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -254,12 +254,17 @@ void ovs_dp_process_packet_with_key(struct sk_buff *skb, stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ - flow = ovs_flow_tbl_lookup_stats(&dp->table, pkt_key, skb_get_hash(skb), - &n_mask_hit); + flow = OVS_CB(skb)->flow; + if (flow && recirc == false /* XXX do better... */) + n_mask_hit = 1; /* XXX pretend mask cache hit */ + else + flow = ovs_flow_tbl_lookup_stats(&dp->table, pkt_key, + skb_get_hash(skb), &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; upcall.cmd = OVS_PACKET_CMD_MISS; + ovs_flow_rebuild_key(skb, pkt_key); upcall.key = pkt_key; upcall.userdata = NULL; upcall.portid = ovs_vport_find_upcall_portid(p, skb); @@ -289,7 +294,23 @@ void ovs_dp_process_received_packet(struct sk_buff *skb) { int error; struct sw_flow_key key; + const struct datapath *dp = OVS_CB(skb)->input_vport->dp; + const struct flow_fastpath *fp; + + fp = ovsl_dereference(dp->table.fastpath); + if (fp) { + struct sw_flow *flow; + flow = fp->lookup(skb, OVS_CB(skb)->input_vport->port_no); + if (flow) { + OVS_CB(skb)->flow = flow; + /* XXX need to check if we can really use flow->key + * for the forwarding action... + */ + ovs_dp_process_packet_with_key(skb, &flow->key , false); + return; + } + } /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_extract(skb, OVS_CB(skb)->input_vport->port_no, &key); if (unlikely(error)) { @@ -2168,9 +2189,8 @@ static int __init dp_init(void) BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); -// pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n", -// VERSION); - pr_info("Open vSwitch switching datapath\n"); + pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n", + VERSION); err = ovs_flow_init(); if (err) diff --git a/datapath/datapath.h b/datapath/datapath.h index d6dee50..20fe5e6 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -107,6 +107,7 @@ struct ovs_skb_cb { struct sw_flow_key *pkt_key; struct ovs_tunnel_info *tun_info; struct vport *input_vport; + uint8_t key_masked; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/datapath/flow.c b/datapath/flow.c index e234796..2afe4c9 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -694,3 +694,41 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) return 0; } + +int ovs_flow_rebuild_key(struct sk_buff *skb, struct sw_flow_key *key) +{ + if (!OVS_CB(skb)->key_masked) + return 0; + if (ovs_flow_extract(skb, OVS_CB(skb)->input_vport->port_no, key)) + return -1; + OVS_CB(skb)->key_masked = 0; + return 0; +} + +void update_range__(struct sw_flow_match *match, + size_t offset, size_t size, bool is_mask) +{ + struct sw_flow_key_range *range = NULL; + size_t start = rounddown(offset, sizeof(long)); + size_t end = roundup(offset + size, sizeof(long)); + + if (!is_mask) + range = &match->range; + else if (match->mask) + range = &match->mask->range; + + if (!range) + return; + + if (range->start == range->end) { + range->start = start; + range->end = end; + return; + } + + if (range->start > start) + range->start = start; + + if (range->end < end) + range->end = end; +} diff --git a/datapath/flow.h b/datapath/flow.h index f6afa48..71e025d 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -218,5 +218,35 @@ void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); +int ovs_flow_rebuild_key(struct sk_buff *, struct sw_flow_key *); + +void update_range__(struct sw_flow_match *, size_t, size_t, bool); +#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + (match)->mask->key.field = value; \ + } else { \ + (match)->key->field = value; \ + } \ + } while (0) + +#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ + do { \ + update_range__(match, offset, len, is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\ + } else { \ + memcpy((u8 *)(match)->key + offset, value_p, len); \ + } \ + } while (0) + +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ + SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ + value_p, len, is_mask) + #endif /* flow.h */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 5f975a1..6f2f645 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -51,61 +51,6 @@ #include "flow_netlink.h" -static void update_range__(struct sw_flow_match *match, - size_t offset, size_t size, bool is_mask) -{ - struct sw_flow_key_range *range = NULL; - size_t start = rounddown(offset, sizeof(long)); - size_t end = roundup(offset + size, sizeof(long)); - - if (!is_mask) - range = &match->range; - else if (match->mask) - range = &match->mask->range; - - if (!range) - return; - - if (range->start == range->end) { - range->start = start; - range->end = end; - return; - } - - if (range->start > start) - range->start = start; - - if (range->end < end) - range->end = end; -} - -#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - sizeof((match)->key->field), is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - (match)->mask->key.field = value; \ - } else { \ - (match)->key->field = value; \ - } \ - } while (0) - -#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ - do { \ - update_range__(match, offset, len, is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\ - } else { \ - memcpy((u8 *)(match)->key + offset, value_p, len); \ - } \ - } while (0) - -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ - SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ - value_p, len, is_mask) - static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs) { diff --git a/datapath/flow_table.c b/datapath/flow_table.c index 9ab1020..ae94bff 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -18,6 +18,7 @@ #include "flow.h" #include "datapath.h" +#include "flow_netlink.h" #include <linux/uaccess.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -57,6 +58,20 @@ static struct kmem_cache *flow_cache; struct kmem_cache *flow_stats_cache __read_mostly; +static struct sw_flow *fastpath_lookup_l2addrs(struct sk_buff *, u16); +static void fastpath_init_l2addrs(struct flow_fastpath *); +static struct flow_fastpath fastpath_array[] = +{ + { + .lookup = fastpath_lookup_l2addrs, + .init = fastpath_init_l2addrs, + .data = NULL, + .ma = {.count = 0, .max = 0}, + } +}; +#define FASTPATH_ARRAY_LEN ARRAY_SIZE(fastpath_array) +static void fastpath_update(struct flow_table *tbl); + static u16 range_n_bytes(const struct sw_flow_key_range *range) { return range->end - range->start; @@ -301,10 +316,31 @@ static void tbl_mask_array_delete_mask(struct mask_array *ma, RCU_INIT_POINTER(ma->masks[i], NULL); } +static void flow_fastpath_destroy(struct flow_table *table) +{ + int i, j; + + rcu_assign_pointer(table->fastpath, NULL); + for (i = 0; i < FASTPATH_ARRAY_LEN; i++) { + struct flow_fastpath *fp = &fastpath_array[i]; + struct mask_array *ma = &fp->ma; + + /* we have not been ref-counted masks */ + for (j = 0; j < ma->count; j++) { + struct sw_flow_mask *mask = ovsl_dereference(ma->masks[j]); + + tbl_mask_array_delete_mask(ma, mask); + call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + } + ma->max = 0; + } +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti; struct mask_array *ma; + int i; table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * MC_HASH_ENTRIES, __alignof__(struct mask_cache_entry)); @@ -323,6 +359,11 @@ int ovs_flow_tbl_init(struct flow_table *table) rcu_assign_pointer(table->mask_array, ma); table->last_rehash = jiffies; table->count = 0; + rcu_assign_pointer(table->fastpath, NULL); + for (i = 0; i < FASTPATH_ARRAY_LEN; i++) { + struct flow_fastpath *fp = &fastpath_array[i]; + fp->init(fp); + } return 0; free_mask_array: @@ -374,6 +415,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = (struct table_instance __force *)table->ti; + flow_fastpath_destroy(table); free_percpu(table->mask_cache); kfree((struct mask_array __force *)table->mask_array); table_instance_destroy(ti, false); @@ -747,6 +789,7 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) tbl_mask_array_delete_mask(ma, mask); call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + fastpath_update(tbl); } } } @@ -845,6 +888,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, } flow->mask = mask; + fastpath_update(tbl); return 0; } @@ -880,6 +924,58 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, return 0; } +/* Return 0 if two mask arrays are identical in random + * order. We assume no duplicate in each of arrays. + */ +static int mask_array_cmp(const struct mask_array *a, const struct mask_array *b) +{ + int i, j; + + if (a->count != b->count) + return 1; + + for (i = 0; i < a->count; i++) { + struct sw_flow_mask *x; + + x = ovsl_dereference(a->masks[i]); + for (j = 0; j < b->count; j++) { + struct sw_flow_mask *y; + + y = ovsl_dereference(b->masks[j]); + if (mask_equal(x, y)) + break; + } + if (j == b->count) + return 1; + } + return 0; +} + +/* + * Search for a corresponding fastpath implementation. + * If there is a match, we install the corresponding one, + * otherwise de-install current one. + * So this can be used on both addition and deletion of a mask. + */ +static void fastpath_update(struct flow_table *tbl) +{ + const struct mask_array *ma; + int i; + + ma = ovsl_dereference(tbl->mask_array); + + for (i = 0; i < FASTPATH_ARRAY_LEN; i++) { + struct flow_fastpath *fp = &fastpath_array[i]; + + if (mask_array_cmp(&fp->ma, ma) == 0) { + rcu_assign_pointer(tbl->fastpath, fp); + break; + } + } + if (i == FASTPATH_ARRAY_LEN && ovsl_dereference(tbl->fastpath) != NULL) + rcu_assign_pointer(tbl->fastpath, NULL); +} + /* Initializes the flow module. * Returns zero if successful or a negative error code. */ int ovs_flow_init(void) @@ -912,3 +1008,75 @@ void ovs_flow_exit(void) kmem_cache_destroy(flow_stats_cache); kmem_cache_destroy(flow_cache); } + +static void fastpath_init_l2addrs(struct flow_fastpath *fp) +{ + struct mask_array *ma = &fp->ma; + struct sw_flow_mask *mask; + struct sw_flow_match match; + struct sw_flow_key dummy; + struct ovs_key_ethernet eth_key; + u32 in_port; + __be16 tci; + + mask = mask_alloc(); + if (!mask) + return; /* ma->max == 0 indicates uninitialized */ + ma->count = ma->max = 1; + + ovs_match_init(&match, &dummy, mask); + memset(mask, 0, sizeof(*mask)); + + in_port = 0xffffffff; + SW_FLOW_KEY_PUT(&match, phy.in_port, in_port, true); + memset(eth_key.eth_src, 0xff, sizeof(eth_key.eth_src)); + SW_FLOW_KEY_MEMCPY(&match, eth.src, eth_key.eth_src, ETH_ALEN, true); + memset(eth_key.eth_dst, 0xff, sizeof(eth_key.eth_dst)); + SW_FLOW_KEY_MEMCPY(&match, eth.dst, eth_key.eth_dst, ETH_ALEN, true); + tci = htons(0xffff); + SW_FLOW_KEY_PUT(&match, eth.tci, tci, true); + + rcu_assign_pointer(ma->masks[0], mask); + return; +} + +static struct sw_flow *fastpath_lookup_l2addrs(struct sk_buff *skb, u16 in_port) +{ + struct sw_flow *flow; + struct sw_flow_key key; + const struct vport *p = OVS_CB(skb)->input_vport; + struct datapath *dp = p->dp; + struct ethhdr *eth; + struct table_instance *ti; + struct hlist_head *head; + struct sw_flow_mask *mask; + unsigned short start, end; + u32 hash; + struct flow_fastpath *fp = rcu_dereference_ovsl(dp->table.fastpath); + + memset(&key, 0, sizeof(key)); + key.phy.priority = skb->priority; + key.phy.in_port = in_port; + key.phy.skb_mark = skb->mark; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + ether_addr_copy(key.eth.src, eth->h_source); + ether_addr_copy(key.eth.dst, eth->h_dest); + /* mark as partial to create complete one later if needed e.g., upcall */ + OVS_CB(skb)->key_masked = 1; + + mask = rcu_dereference_ovsl(fp->ma.masks[0]); + start = mask->range.start; + end = mask->range.end; + + /* key is already masked */ + ti = rcu_dereference_ovsl(dp->table.ti); + hash = flow_hash(&key, start, end); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { + if (flow->hash == hash && flow_cmp_masked_key(flow, &key, start, end)) + return flow; + } + return NULL; +} diff --git a/datapath/flow_table.h b/datapath/flow_table.h index a05d36a..ae19c4c 100644 --- a/datapath/flow_table.h +++ b/datapath/flow_table.h @@ -56,12 +56,20 @@ struct table_instance { bool keep_flows; }; +struct flow_fastpath { + void (*init)(struct flow_fastpath *); + struct sw_flow* (*lookup)(struct sk_buff *skb, u16 in_port); + void *data; /* opaque to store optimal database */ + struct mask_array ma; +}; + struct flow_table { struct table_instance __rcu *ti; struct mask_cache_entry __percpu *mask_cache; struct mask_array __rcu *mask_array; unsigned long last_rehash; unsigned int count; + struct flow_fastpath __rcu *fastpath; }; extern struct kmem_cache *flow_stats_cache; -- 1.9.3 (Apple Git-50) _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev