On Wed, Mar 29, 2023 at 9:05 AM Ilya Maximets <[email protected]> wrote:
>
> While crushing OR expressions, OVN removes exact replicas of sub
> expressions. However, there could be many CMP expressions that are
> supersets of each other. These are most likely to be created as a
> result of cross-product while expanding brackets in the AND expression
> in crush_and_numeric(), i.e. while converting
> "x && (a0 || a1) && (b0 || b1)" into "xa0b0 || xa0b1 || xa1b0 || xa1b1".
>
> In addition to removal of exact duplicates introducing scan and removal
> of supersets of other existing sub-expressions to reduce the amount of
> generated flows. This adds extra computations, but should save time
> later, since less flows will be generated.
>
> Example:
>
> "ip4.src == 172.168.0.0/16 && ip4.src!={172.168.13.0/24, 172.168.15.0/24
}"
>
> Processing of this expression yields 42 flows:
>
> $ ./tests/ovstest test-ovn expr-to-flows <<< "$expr"
>
> ip,nw_src=172.168.0.0/255.255.1.0
> ip,nw_src=172.168.0.0/255.255.10.0
> ip,nw_src=172.168.0.0/255.255.12.0
> ip,nw_src=172.168.0.0/255.255.3.0
> ip,nw_src=172.168.0.0/255.255.4.0
> ip,nw_src=172.168.0.0/255.255.5.0
> ip,nw_src=172.168.0.0/255.255.6.0
> ip,nw_src=172.168.0.0/255.255.8.0
> ip,nw_src=172.168.0.0/255.255.9.0
> ip,nw_src=172.168.128.0/17
> <... 32 more flows ...>
>
> We can see that many flows above do overlap, e.g. 255.255.3.0
> mask is a superset of 255.255.1.0. Everything that matches
> 255.255.3.0, will match 255.255.1.0 as well (the value is the same).
>
> By removing all the unnecessary supersets, the set of flows can
> be reduced from 42 down to 7:
>
> ip,nw_src=172.168.0.0/255.255.1.0
> ip,nw_src=172.168.0.0/255.255.4.0
> ip,nw_src=172.168.0.0/255.255.8.0
> ip,nw_src=172.168.128.0/17
> ip,nw_src=172.168.16.0/255.255.16.0
> ip,nw_src=172.168.32.0/255.255.32.0
> ip,nw_src=172.168.64.0/255.255.64.0
>
> This change should be particularly useful for expressions with
> inequality checks, like the one above. Such expressions are
> frequent among ACL rules.
>
> "ip4.src != {172.168.13.0/24, 172.168.14.0/24, 172.168.15.0/24}"
>
> Brefore:
> $ ./tests/ovstest test-ovn expr-to-flows <<< "$expr" | wc -l
> 2894
>
> After:
> $ ./tests/ovstest test-ovn expr-to-flows <<< "$expr" | wc -l
> 23
>
> Superset lookups are performed only if there are expressions with
> more bits in the mask than in the current one. So, there is no extra
> cost for equality checks on normal address sets, like port group sets,
> where all the IPs are exect matches or have the same prefix length
> otherwise.
>
> Use of bitmaps instead of subvalue functions significantly speeds up
> processing since most of the subvalue space is an all-zero empty space.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2177197
> Reported-by: Nadia Pinaeva <[email protected]>
> Signed-off-by: Ilya Maximets <[email protected]>
> ---
> include/ovn/expr.h | 1 +
> lib/expr.c | 238 ++++++++++++++++++++++++++++++++++-----------
> tests/ovn.at | 12 +++
> 3 files changed, 196 insertions(+), 55 deletions(-)
>
> diff --git a/include/ovn/expr.h b/include/ovn/expr.h
> index 80d95ff67..c48f82398 100644
> --- a/include/ovn/expr.h
> +++ b/include/ovn/expr.h
> @@ -384,6 +384,7 @@ struct expr {
> struct {
> union mf_subvalue value;
> union mf_subvalue mask;
> + size_t mask_n_bits;
> };
> };
> } cmp;
> diff --git a/lib/expr.c b/lib/expr.c
> index 0a6f7e574..8fd07b2d5 100644
> --- a/lib/expr.c
> +++ b/lib/expr.c
> @@ -15,21 +15,23 @@
> */
>
> #include <config.h>
> +#include "bitmap.h"
> #include "byte-order.h"
> -#include "openvswitch/json.h"
> +#include "hmapx.h"
> #include "nx-match.h"
> #include "openvswitch/dynamic-string.h"
> +#include "openvswitch/json.h"
> #include "openvswitch/match.h"
> #include "openvswitch/ofp-actions.h"
> -#include "openvswitch/vlog.h"
> #include "openvswitch/shash.h"
> +#include "openvswitch/vlog.h"
> +#include "ovn-util.h"
> #include "ovn/expr.h"
> #include "ovn/lex.h"
> #include "ovn/logical-fields.h"
> #include "simap.h"
> #include "sset.h"
> #include "util.h"
> -#include "ovn-util.h"
>
> VLOG_DEFINE_THIS_MODULE(expr);
>
> @@ -2520,6 +2522,183 @@ crush_and_string(struct expr *expr, const struct
expr_symbol *symbol)
> return expr_fix(expr);
> }
>
> +static int
> +compare_cmps_3way(const struct expr *a, const struct expr *b)
> +{
> + ovs_assert(a->cmp.symbol == b->cmp.symbol);
> + if (!a->cmp.symbol->width) {
> + return strcmp(a->cmp.string, b->cmp.string);
> + } else if (a->cmp.mask_n_bits != b->cmp.mask_n_bits) {
> + return a->cmp.mask_n_bits < b->cmp.mask_n_bits ? -1 : 1;
> + } else {
> + int d = memcmp(&a->cmp.value, &b->cmp.value, sizeof
a->cmp.value);
> + if (!d) {
> + d = memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask);
> + }
> + return d;
> + }
> +}
> +
> +static int
> +compare_cmps_cb(const void *a_, const void *b_)
> +{
> + const struct expr *const *ap = a_;
> + const struct expr *const *bp = b_;
> + const struct expr *a = *ap;
> + const struct expr *b = *bp;
> + return compare_cmps_3way(a, b);
> +}
> +
> +/* Similar to mf_subvalue_intersect(), but only checks the possibility of
> + * intersection without producing a result. */
> +static bool
> +expr_bitmap_intersect_check(const unsigned long *a_value,
> + const unsigned long *a_mask,
> + const unsigned long *b_value,
> + const unsigned long *b_mask,
> + size_t bit_width)
> +{
> + for (size_t i = 0; i < bitmap_n_longs(bit_width); i++) {
> + if ((a_value[i] ^ b_value[i]) & (a_mask[i] & b_mask[i])) {
> + return false;
> + }
> + }
> + return true;
> +}
> +
> +/* This function expects an OR expression with already crushed sub
> + * expressions, so they are plain comparisons. Result is the same
> + * expression, but with unnecessary sub-expressions removed. */
> +static struct expr *
> +crush_or_supersets(struct expr *expr, const struct expr_symbol *symbol)
> +{
> + ovs_assert(expr->type == EXPR_T_OR);
> +
> + /* Calculate offset within subfield and a width that can be used
> + * in a bitmap. */
> + const size_t sz = CHAR_BIT * sizeof expr->cmp.value.be64[0];
> + const size_t bit_width = ROUND_UP(symbol->width, sz);
> + const size_t ofs = ARRAY_SIZE(expr->cmp.value.be64) - bit_width / sz;
> +
> + /* Sort subexpressions by number of bits in the mask, value and the
mask
> + * itself, to bring together duplicates and have expressions ordered
by
> + * mask sizes. */
> + size_t n = ovs_list_size(&expr->andor);
> + struct expr **subs = xmalloc(n * sizeof *subs);
> + size_t *next = xmalloc(n * sizeof *next);
I spent some time figuring out that the next array is used to keep track of
the next valid index within the subs array after processing the current
index. It helps in optimizing the traversal of the subs array by skipping
over the invalidated (or removed) sub-expressions during the elimination of
duplicates and supersets. I think it would be helpful to have a comment for
the readers.
> +
> + size_t i = 0, j, max_n_bits = 0;
> + struct expr *sub;
> + LIST_FOR_EACH (sub, node, &expr->andor) {
> + if (symbol->width) {
> + const unsigned long *sub_mask;
> +
> + sub_mask = (unsigned long *) &sub->cmp.mask.be64[ofs];
> + sub->cmp.mask_n_bits = bitmap_count1(sub_mask, bit_width);
> + max_n_bits = MAX(max_n_bits, sub->cmp.mask_n_bits);
> + }
> + next[i] = i + 1;
> + subs[i++] = sub;
> + }
> + ovs_assert(i == n);
> +
> + qsort(subs, n, sizeof *subs, compare_cmps_cb);
> +
> + /* Eliminate duplicates. */
> + size_t last = 0;
> + for (i = 1; i < n; i++) {
> + if (compare_cmps_3way(subs[last], subs[i])) {
> + next[last] = i;
> + last = i;
> + } else {
> + expr_destroy(subs[i]);
> + subs[i] = NULL;
> + }
> + }
> +
> + if (!symbol->width || symbol->level != EXPR_L_ORDINAL) {
> + /* Not a fully maskable field. Can't do much more. */
> + goto done;
> + }
> +
> + /* Build a mask size index. 'mask_index[n_bits]' is an index in
'subs',
> + * where expressions with 'n_bits' bits in mask start. */
> + size_t *mask_index, n_bits;
> + size_t index_size = (max_n_bits + 2) * sizeof *mask_index;
> +
> + mask_index = xmalloc(index_size);
> + /* Initialize to maximum unsigned values. */
> + memset(mask_index, 0xf, index_size);
0xf is a little weird here. It is not wrong, just as correct as using even
0x1, but I think using 0xff, the max value of a byte, would make it more
straightforward to understand.
> +
> + for (i = 0; i < n; i = next[i]) {
> + if (subs[i]) {
> + n_bits = subs[i]->cmp.mask_n_bits;
> + mask_index[n_bits] = MIN(mask_index[n_bits], i);
> + }
> + }
> +
> + /* Fill the gaps, so they point to an index with more bits. */
> + for (i = max_n_bits; i > 0; i--) {
> + mask_index[i] = MIN(mask_index[i], mask_index[i + 1]);
> + }
> +
> + /* Find and eliminate supersets. */
> + for (i = 0; i < n; i = next[i]) {
> + if (!subs[i]) {
> + continue;
> + }
> + /* 'subs' are sorted based on the number of bits in the mask.
> + * For an expression to be a subset, it has to have more bits. */
> + n_bits = subs[i]->cmp.mask_n_bits;
> + if (mask_index[n_bits + 1] > n) {
> + break;
> + }
> + for (j = mask_index[n_bits + 1]; j < n; j = next[j]) {
> + struct expr *a = subs[i], *b = subs[j];
> +
> + ovs_assert(i != j);
> + if (!b) {
> + continue;
> + }
> +
> + const unsigned long *a_value, *a_mask, *b_value, *b_mask;
> + a_value = (unsigned long *) &a->cmp.value.be64[ofs];
> + b_value = (unsigned long *) &b->cmp.value.be64[ofs];
> + a_mask = (unsigned long *) &a->cmp.mask.be64[ofs];
> + b_mask = (unsigned long *) &b->cmp.mask.be64[ofs];
> +
> + if (expr_bitmap_intersect_check(a_value, a_mask, b_value,
b_mask,
> + bit_width)
> + && bitmap_is_superset(b_mask, a_mask, bit_width)) {
> + /* 'a' is the same expression with a smaller mask. */
> + expr_destroy(subs[j]);
> + subs[j] = NULL;
> + }
> + /* Shorten the path for the next round. */
> + while (next[j] < n && !subs[next[j]]) {
Maybe not a big deal, but why not just update the next[last] when the
subs[j] is destroyed, which would be more straightforward and slightly more
efficient?
> + next[j] = next[next[j]];
> + }
> + }
> + }
I think the algorithm is very smart. However, as you mentioned in the
meeting today, the worst case of this nested loop can still be bad. For
example, if we have an address set with 1000 addresses (not very big), but
half of them are with the mask of /24, and the other half are with /32,
without any overlapping, then it would be O(500 * 500) time complexity.
This example may be too extreme, but in reality, I think we shouldn't be
surprised to see someone using an address set with 10 CIDRs of /24 together
with another 1000 individual IPs, and the algorithm would make it 10 times
slower than before. We should at least explicitly document this inefficient
use of address sets and remind users to avoid the pitfalls, and in turn,
document or optimize in CMS (k8s, openstack, ...) to avoid such usage.
Alternatively, we may still restrict the algorithm for negation expr
handling.
@Mark Michelson <[email protected]> mentioned he will review it, too. So
I'd like to hear his opinion.
Thanks,
Han
> + free(mask_index);
> +
> +done:
> + ovs_list_init(&expr->andor);
> + for (i = 0; i < n; i++) {
> + if (subs[i]) {
> + ovs_list_push_back(&expr->andor, &subs[i]->node);
> + } else {
> + /* Members modified, so untrack address set. */
> + free(expr->as_name);
> + expr->as_name = NULL;
> + }
> + }
> +
> + free(next);
> + free(subs);
> + return expr;
> +}
> +
> /* Implementation of crush_cmps() for expr->type == EXPR_T_AND and a
> * numeric-typed 'symbol'. */
> static struct expr *
> @@ -2679,31 +2858,6 @@ crush_and_numeric(struct expr *expr, const struct
expr_symbol *symbol)
> }
> }
>
> -static int
> -compare_cmps_3way(const struct expr *a, const struct expr *b)
> -{
> - ovs_assert(a->cmp.symbol == b->cmp.symbol);
> - if (!a->cmp.symbol->width) {
> - return strcmp(a->cmp.string, b->cmp.string);
> - } else {
> - int d = memcmp(&a->cmp.value, &b->cmp.value, sizeof
a->cmp.value);
> - if (!d) {
> - d = memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask);
> - }
> - return d;
> - }
> -}
> -
> -static int
> -compare_cmps_cb(const void *a_, const void *b_)
> -{
> - const struct expr *const *ap = a_;
> - const struct expr *const *bp = b_;
> - const struct expr *a = *ap;
> - const struct expr *b = *bp;
> - return compare_cmps_3way(a, b);
> -}
> -
> /* Implementation of crush_cmps() for expr->type == EXPR_T_OR. */
> static struct expr *
> crush_or(struct expr *expr, const struct expr_symbol *symbol)
> @@ -2723,34 +2877,8 @@ crush_or(struct expr *expr, const struct
expr_symbol *symbol)
> return expr;
> }
>
> - /* Sort subexpressions by value and mask, to bring together
duplicates. */
> - size_t n = ovs_list_size(&expr->andor);
> - struct expr **subs = xmalloc(n * sizeof *subs);
> -
> - size_t i = 0;
> - LIST_FOR_EACH (sub, node, &expr->andor) {
> - subs[i++] = sub;
> - }
> - ovs_assert(i == n);
> -
> - qsort(subs, n, sizeof *subs, compare_cmps_cb);
> + expr = crush_or_supersets(expr, symbol);
>
> - /* Eliminate duplicates. */
> - ovs_list_init(&expr->andor);
> - ovs_list_push_back(&expr->andor, &subs[0]->node);
> - for (i = 1; i < n; i++) {
> - struct expr *a = expr_from_node(ovs_list_back(&expr->andor));
> - struct expr *b = subs[i];
> - if (compare_cmps_3way(a, b)) {
> - ovs_list_push_back(&expr->andor, &b->node);
> - } else {
> - expr_destroy(b);
> - /* Member modified, so untrack address set. */
> - free(expr->as_name);
> - expr->as_name = NULL;
> - }
> - }
> - free(subs);
> return expr_fix(expr);
> }
>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index a892691ca..7dfe0df24 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -822,6 +822,18 @@ ip,nw_src=10.0.0.1,nw_dst=172.27.0.65
> ip,nw_src=10.0.0.2,nw_dst=172.27.0.65
> ip,nw_src=10.0.0.3,nw_dst=172.27.0.65
> ])
> +AT_CHECK([expr_to_flow 'ip4.src == 172.168.13.0/16 && ip4.src != {
172.168.13.0/24, 172.168.14.0/24}'], [0], [dnl
> +ip,nw_src=172.168.0.0/255.255.3.0
> +ip,nw_src=172.168.0.0/255.255.4.0
> +ip,nw_src=172.168.0.0/255.255.8.0
> +ip,nw_src=172.168.128.0/17
> +ip,nw_src=172.168.16.0/255.255.16.0
> +ip,nw_src=172.168.3.0/255.255.3.0
> +ip,nw_src=172.168.32.0/255.255.32.0
> +ip,nw_src=172.168.64.0/255.255.64.0
> +])
> +dnl Negative match flow explosion.
> +AT_CHECK([test $(expr_to_flow 'ip4.src != {172.168.13.0/24,
172.168.14.0/24, 172.168.15.0/24}' | wc -l) -le 30])
> AT_CLEANUP
>
> AT_SETUP([converting expressions to flows -- port groups])
> --
> 2.39.2
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev