On Fri, May 16, 2014 at 5:33 PM, Andy Zhou <[email protected]> wrote:
> When deleting a mask from the mask array, we always move the last entry
> into its current location. Another approach can be NULL in its current
> place, and periodically compact it.
>
> The approach taken by this patch is more efficient during run time.
> During look up, fast path packet don't have to skip over NULL pointers.
>
> A more important advantage of this approach is that it tries to
> keep the mask array index stable by avoiding periodic index reshuffle.
>
> This patch implements an optimization to further promote index
> stability. By leaving the last entry value intact when moving it to
> a new location, the old cache index can 'fix' themselves, by noticing
> the index in the cache is outside the valid mask array region. The new
> index can be found by scanning the mask pointer within the valid region.
>
> Signed-off-by: Andy Zhou <[email protected]>
>
> ----
> v1 -> v2:
> * added shrink mask array function.
> * documented the cornor case of mask deletion.
> * Simplifed mask flow lookup function
> (w.r.t. using and update the mask cache)
> ---
> datapath/flow_table.c | 162
> ++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 112 insertions(+), 50 deletions(-)
>
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index 58a25c7..564e51d 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table
> *tbl, int size)
> if (old) {
> int i;
>
> - for (i = 0; i < old->max; i++) {
> - if (old->masks[i])
> - new->masks[new->count++] = old->masks[i];
> - }
> + for (i = 0; i < old->max; i++)
> + new->masks[i] = old->masks[i];
> +
> + new->count = old->count;
> }
> rcu_assign_pointer(tbl->mask_array, new);
>
> @@ -260,6 +260,65 @@ static int tbl_mask_array_realloc(struct flow_table
> *tbl, int size)
> return 0;
> }
>
> +static void tbl_mask_array_delete_mask(struct mask_array *ma,
> + const struct sw_flow_mask *mask)
> +{
> + int i = 0;
> +
> + /* Delete a mask pointer from the valid section.
> + *
> + * Also move the last entry in its place, so there is no
> + * whole in the valid section.
> + *
> + * Notice the last entry still points to the original mask.
> + *
> + * <Note>: there is a small race window that may cause a mask
> + * to be missed in a search. Imaging a core is
> + * walking through the array, passing the index of deleting mask.
> + * But before reaching the last entry, it is overwritten,
> + * by another core that is adding a new mask, now the last entry
> + * will point to the new mask. In this case, the moved up last
> + * entry can be missed by the core walking the mask array.
> + *
> + * In case this missed mask would have led to successful
> + * lookup, Hitting the race window could cause a packet to miss
> + * kernel flow cache, and be sent to the user space.
> + * </Note>
> + */
> + while (i < ma->count - 2) {
> + if (mask == ma->masks[i]) {
> + struct sw_flow_mask *last;
> +
> + last = ma->masks[ma->count - 1];
> + rcu_assign_pointer(ma->masks[i], last);
> + ma->count--;
> + break;
> + } else
> + i++;
> + }
> +
> + /* Remove the deleted mask pointers from the invalid section. */
> + for (; i < ma->max; i++) {
> + if (mask == ma->masks[i])
> + RCU_INIT_POINTER(ma->masks[i], NULL);
> +
> + if (i == ma->count - 1)
> + ma->count--;
If this condition can be true only once, otherwise it is bug.
I think first loop can have range of [0, count), then second loop
can reset duplicate pointers. I dont think there is need to single out
last element of array.
> + }
> +}
> +
> +static int tbl_mask_array_find_idx(struct mask_array *ma,
> + const struct sw_flow_mask *mask)
> +{
> + int i;
> +
> + for (i = 0; i < ma->count; i++)
> + if (mask == ovsl_dereference(ma->masks[i]))
> + return i;
> +
> + return -1;
> +}
> +
> int ovs_flow_tbl_init(struct flow_table *table)
> {
> struct table_instance *ti;
> @@ -524,7 +583,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> struct sw_flow *flow;
> int i;
>
> - for (i = 0; i < ma->max; i++) {
> + for (i = 0; i < ma->count; i++) {
> struct sw_flow_mask *mask;
>
> mask = rcu_dereference_ovsl(ma->masks[i]);
> @@ -554,8 +613,9 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
> flow_table *tbl,
> {
> struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> - struct mask_cache_entry *entries, *ce, *del;
> + struct mask_cache_entry *entries, *ce;
> struct sw_flow *flow;
> + struct sw_flow_mask *cache;
> u32 hash = skb_hash;
> int seg;
>
> @@ -566,42 +626,53 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
> flow_table *tbl,
> return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
> }
>
> - del = NULL;
> + ce = NULL;
> + cache = NULL;
> entries = this_cpu_ptr(tbl->mask_cache);
>
> + /* Find the cache entry 'ce' to operate on. */
> for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> - int index;
> -
> - index = hash & (MC_HASH_ENTRIES - 1);
> - ce = &entries[index];
> -
> - if (ce->skb_hash == skb_hash) {
> - struct sw_flow_mask *mask;
> + struct mask_cache_entry *e;
> + int index = hash & (MC_HASH_ENTRIES - 1);
> +
> + e = &entries[index];
> + if (e->skb_hash == skb_hash) {
> + int i = e->mask_index;
> +
> + if (i < ma->count)
> + cache = rcu_dereference_ovsl(ma->masks[i]);
> + else if (i < ma->max) {
> + cache = rcu_dereference_ovsl(ma->masks[i]);
> + i = tbl_mask_array_find_idx(ma, cache);
> + if (i < 0)
> + cache = NULL;
> + }
>
> - mask =
> rcu_dereference_ovsl(ma->masks[ce->mask_index]);
> - if (mask) {
> - flow = masked_flow_lookup(ti, key, mask,
> - n_mask_hit);
> - if (flow) /* Found */
> - return flow;
> + if (!cache)
> + e->skb_hash = 0; /* Not a valid cache entry.
> */
>
> - }
> - del = ce;
> + ce = e; /* The best cache candidate. */
> break;
> }
>
> - if (!del || (del->skb_hash && !ce->skb_hash) ||
> - (rcu_dereference_ovsl(ma->masks[del->mask_index]) &&
> - !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) {
> - del = ce;
> - }
> + if (!ce || e->skb_hash > ce->skb_hash)
> + ce = e; /* A better replacement cache candidate. */
>
> hash >>= MC_HASH_SHIFT;
> }
>
> - flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index);
> + /* Try cached mask first a cache entry is found. */
> + if (cache) {
> + flow = masked_flow_lookup(ti, key, cache, n_mask_hit);
> + if (flow)
> + /* Cache hit. */
> + return flow;
> + }
> +
> + /* Cache miss, do full lookup. */
> + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
> if (flow)
> - del->skb_hash = skb_hash;
> + ce->skb_hash = skb_hash;
>
> return flow;
> }
Can you post separate patch to simplify lookup function?
> @@ -644,18 +715,17 @@ static void flow_mask_remove(struct flow_table *tbl,
> struct sw_flow_mask *mask)
>
> if (!mask->ref_count) {
> struct mask_array *ma;
> - int i;
>
> ma = ovsl_dereference(tbl->mask_array);
> - for (i = 0; i < ma->max; i++) {
> - if (mask == ovsl_dereference(ma->masks[i])) {
> - RCU_INIT_POINTER(ma->masks[i], NULL);
> - ma->count--;
> - goto free;
> - }
> + /* Shrink the mask array if necessary. */
> + if (ma->max > MASK_ARRAY_SIZE_MIN * 2
> + && ma->count <= ma->max / 4) {
> +
> + tbl_mask_array_realloc(tbl, ma->max / 2);
> + ma = ovsl_dereference(tbl->mask_array);
> }
We can shrink it according to ma->count, rather than ma->max.
> - BUG();
> -free:
> +
> + tbl_mask_array_delete_mask(ma, mask);
> call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
> }
> }
> @@ -704,7 +774,7 @@ static struct sw_flow_mask *flow_mask_find(const struct
> flow_table *tbl,
> int i;
>
> ma = ovsl_dereference(tbl->mask_array);
> - for (i = 0; i < ma->max; i++) {
> + for (i = 0; i < ma->count; i++) {
> struct sw_flow_mask *t;
>
> t = ovsl_dereference(ma->masks[i]);
> @@ -724,7 +794,6 @@ static int flow_mask_insert(struct flow_table *tbl,
> struct sw_flow *flow,
> mask = flow_mask_find(tbl, new);
> if (!mask) {
> struct mask_array *ma;
> - int i;
>
> /* Allocate a new mask if none exsits. */
> mask = mask_alloc();
> @@ -747,16 +816,9 @@ static int flow_mask_insert(struct flow_table *tbl,
> struct sw_flow *flow,
> }
> ma = ovsl_dereference(tbl->mask_array);
> }
> - for (i = 0; i < ma->max; i++) {
> - const struct sw_flow_mask *t;
> -
> - t = ovsl_dereference(ma->masks[i]);
> - if (!t) {
> - rcu_assign_pointer(ma->masks[i], mask);
> - ma->count++;
> - break;
> - }
> - }
> +
> + rcu_assign_pointer(ma->masks[ma->count], mask);
> + ma->count++;
> } else {
> BUG_ON(!mask->ref_count);
> mask->ref_count++;
> --
> 1.9.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev