Enhance MAC_Binding aging to allow CIDR-based threshold configurations. This enables distinct threshold settings for different IP ranges, applying the longest prefix matching for overlapping ranges.
A common use case involves setting a default threshold for all IPs, while disabling aging for a specific range and potentially excluding a subset within that range. Signed-off-by: Han Zhou <hz...@ovn.org> --- northd/aging.c | 297 +++++++++++++++++++++++++++++++++++++++++++++--- northd/aging.h | 3 + northd/northd.c | 11 +- ovn-nb.xml | 63 +++++++++- tests/ovn.at | 60 ++++++++++ 5 files changed, 413 insertions(+), 21 deletions(-) diff --git a/northd/aging.c b/northd/aging.c index f626c72c8ca3..e5868211a63b 100644 --- a/northd/aging.c +++ b/northd/aging.c @@ -47,12 +47,253 @@ aging_waker_schedule_next_wake(struct aging_waker *waker, int64_t next_wake_ms) } } +struct threshold_entry { + union { + ovs_be32 ipv4; + struct in6_addr ipv6; + } prefix; + bool is_v4; + unsigned int plen; + unsigned int threshold; +}; + +/* Contains CIDR-based aging threshold configuration parsed from + * "Logical_Router:options:mac_binding_age_threshold". + * + * This struct is also used for non-CIDR-based threshold, e.g. the ones from + * "NB_Global:other_config:fdb_age_threshold" for the common aging_context + * interface. + * + * - The arrays `v4_entries` and `v6_entries` are populated with parsed entries + * for IPv4 and IPv6 CIDRs, respectively, along with their associated + * thresholds. Entries within these arrays are sorted by prefix length, + * starting with the longest. + * + * - If a threshold is provided without an accompanying prefix, it's captured + * in `default_threshold`. In cases with multiple unprefixed thresholds, + * `default_threshold` will only store the last one. */ +struct threshold_config { + struct threshold_entry *v4_entries; + int n_v4_entries; + struct threshold_entry *v6_entries; + int n_v6_entries; + unsigned int default_threshold; +}; + +static int +compare_entries_by_prefix_length(const void *a, const void *b) +{ + const struct threshold_entry *entry_a = a; + const struct threshold_entry *entry_b = b; + + return entry_b->plen - entry_a->plen; +} + +/* Parse an ENTRY in the threshold option, with the format: + * [CIDR:]THRESHOLD + * + * Returns true if successful, false if failed. */ +static bool +parse_threshold_entry(const char *str, struct threshold_entry *entry) +{ + char *colon_ptr; + unsigned int value; + const char *threshold_str; + + colon_ptr = strrchr(str, ':'); + if (!colon_ptr) { + threshold_str = str; + entry->plen = 0; + } else { + threshold_str = colon_ptr + 1; + } + + if (!str_to_uint(threshold_str, 10, &value)) { + return false; + } + entry->threshold = value; + + if (!colon_ptr) { + return true; + } + + /* ":" was found, so parse the string before ":" as a cidr. */ + char ip_cidr[128]; + ovs_strzcpy(ip_cidr, str, MIN(colon_ptr - str + 1, sizeof ip_cidr)); + char *error = ip_parse_cidr(ip_cidr, &entry->prefix.ipv4, &entry->plen); + if (!error) { + entry->is_v4 = true; + return true; + } + free(error); + error = ipv6_parse_cidr(ip_cidr, &entry->prefix.ipv6, &entry->plen); + if (!error) { + entry->is_v4 = false; + return true; + } + free(error); + return false; +} + +static void +threshold_config_destroy(struct threshold_config *config) +{ + free(config->v4_entries); + free(config->v6_entries); + config->v4_entries = config->v6_entries = NULL; + config->n_v4_entries = config->n_v6_entries = 0; + config->default_threshold = 0; +} + +/* Parse the threshold option string, which has the format: + * ENTRY[;ENTRY[...]] + * + * For the exact format of ENTRY, refer to the function + * `parse_threshold_entry`. + * + * The parsed data is populated to the struct threshold_config. + * See the comments of struct threshold_config for details. + * + * Return Values: + * - Returns `false` if the input does not match the expected format. + * Consequently, no entries will be populated. + * - Returns `true` upon successful parsing. The caller is responsible for + * releasing the allocated memory by calling threshold_config_destroy. */ +static bool +parse_aging_threshold(const char *opt, + struct threshold_config *config) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + struct threshold_entry e; + const char *start = opt, *end; + memset(config, 0, sizeof *config); + + while (start && *start) { + end = strchr(start, ';'); + if (end) { + /* Big enough for any <ipv6>/<ipv6 mask>:<threshold> */ + char buf[128]; + ovs_strzcpy(buf, start, MIN(end - start + 1, sizeof buf)); + if (!parse_threshold_entry(buf, &e)) { + VLOG_WARN_RL(&rl, "Parsing aging threshold '%s' failed.", buf); + goto err; + } + start = end + 1; + } else { + if (!parse_threshold_entry(start, &e)) { + VLOG_WARN_RL(&rl, "Parsing aging threshold '%s' failed.", + start); + goto err; + } + start += strlen(start); + } + if (!e.plen) { + config->default_threshold = e.threshold; + } else if (e.is_v4) { + config->n_v4_entries++; + config->v4_entries = xrealloc(config->v4_entries, + config->n_v4_entries * sizeof e); + config->v4_entries[config->n_v4_entries - 1] = e; + } else { + config->n_v6_entries++; + config->v6_entries = xrealloc(config->v6_entries, + config->n_v6_entries * sizeof e); + config->v6_entries[config->n_v6_entries - 1] = e; + } + } + + if (config->n_v4_entries > 0) { + qsort(config->v4_entries, config->n_v4_entries, sizeof e, + compare_entries_by_prefix_length); + } + + if (config->n_v6_entries > 0) { + qsort(config->v6_entries, config->n_v6_entries, sizeof e, + compare_entries_by_prefix_length); + } + + return true; + +err: + threshold_config_destroy(config); + return false; +} + +static unsigned int +find_threshold_for_ip(const char *ip_str, + const struct threshold_config *config) +{ + if (!ip_str) { + return config->default_threshold; + } + + ovs_be32 ipv4; + struct in6_addr ipv6; + if (ip_parse(ip_str, &ipv4)) { + for (int i = 0; i < config->n_v4_entries; i++) { + ovs_be32 masked_ip = ipv4 & + be32_prefix_mask(config->v4_entries[i].plen); + if (masked_ip == config->v4_entries[i].prefix.ipv4) { + return config->v4_entries[i].threshold; + } + } + } else if (ipv6_parse(ip_str, &ipv6)) { + for (int i = 0; i < config->n_v6_entries; i++) { + struct in6_addr v6_mask = + ipv6_create_mask(config->v6_entries[i].plen); + struct in6_addr masked_ip = ipv6_addr_bitand(&ipv6, &v6_mask); + if (ipv6_addr_equals(&masked_ip, + &config->v6_entries[i].prefix.ipv6)) { + return config->v6_entries[i].threshold; + } + } + } + return config->default_threshold; +} + +/* Parse the threshold option string (see the comment of the function + * parse_aging_threshold), and returns the smallest threshold. */ +unsigned int +min_mac_binding_age_threshold(const char *opt) +{ + struct threshold_config config; + if (!parse_aging_threshold(opt, &config)) { + return 0; + } + + unsigned int threshold = UINT_MAX; + unsigned int t; + + for (int i = 0; i < config.n_v4_entries; i++) { + t = config.v4_entries[i].threshold; + if (t && t < threshold) { + threshold = t; + } + } + + for (int i = 0; i < config.n_v6_entries; i++) { + t = config.v6_entries[i].threshold; + if (t && t < threshold) { + threshold = t; + } + } + + t = config.default_threshold; + if (t && t < threshold) { + threshold = t; + } + + threshold_config_destroy(&config); + + return threshold == UINT_MAX ? 0 : threshold; +} + struct aging_context { int64_t next_wake_ms; int64_t time_wall_now; uint32_t removal_limit; uint32_t n_removed; - uint64_t threshold; + struct threshold_config *threshold; }; static struct aging_context @@ -63,13 +304,14 @@ aging_context_init(uint32_t removal_limit) .time_wall_now = time_wall_msec(), .removal_limit = removal_limit, .n_removed = 0, - .threshold = 0, + .threshold = NULL, }; return ctx; } static void -aging_context_set_threshold(struct aging_context *ctx, uint64_t threshold) +aging_context_set_threshold(struct aging_context *ctx, + struct threshold_config *threshold) { ctx->threshold = threshold; } @@ -81,19 +323,27 @@ aging_context_is_at_limit(struct aging_context *ctx) } static bool -aging_context_handle_timestamp(struct aging_context *ctx, int64_t timestamp) +aging_context_handle_timestamp(struct aging_context *ctx, int64_t timestamp, + const char *ip) { int64_t elapsed = ctx->time_wall_now - timestamp; if (elapsed < 0) { return false; } - if (elapsed >= ctx->threshold) { + ovs_assert(ctx->threshold); + uint64_t threshold = 1000 * find_threshold_for_ip(ip, ctx->threshold); + + if (!threshold) { + return false; + } + + if (elapsed >= threshold) { ctx->n_removed++; return true; } - ctx->next_wake_ms = MIN(ctx->next_wake_ms, (ctx->threshold - elapsed)); + ctx->next_wake_ms = MIN(ctx->next_wake_ms, (threshold - elapsed)); return false; } @@ -121,13 +371,18 @@ mac_binding_aging_run_for_datapath(const struct sbrec_datapath_binding *dp, return; } + if (!ctx->threshold->n_v4_entries && !ctx->threshold->n_v6_entries + && !ctx->threshold->default_threshold) { + return; + } + struct sbrec_mac_binding *mb_index_row = sbrec_mac_binding_index_init_row(mb_by_datapath); sbrec_mac_binding_index_set_datapath(mb_index_row, dp); const struct sbrec_mac_binding *mb; SBREC_MAC_BINDING_FOR_EACH_EQUAL (mb, mb_index_row, mb_by_datapath) { - if (aging_context_handle_timestamp(ctx, mb->timestamp)) { + if (aging_context_handle_timestamp(ctx, mb->timestamp, mb->ip)) { sbrec_mac_binding_delete(mb); if (aging_context_is_at_limit(ctx)) { break; @@ -166,13 +421,20 @@ en_mac_binding_aging_run(struct engine_node *node, void *data OVS_UNUSED) continue; } - uint64_t threshold = smap_get_uint(&od->nbr->options, - "mac_binding_age_threshold", 0); - aging_context_set_threshold(&ctx, threshold * 1000); + struct threshold_config threshold_config; + if (!parse_aging_threshold(smap_get(&od->nbr->options, + "mac_binding_age_threshold"), + &threshold_config)) { + return; + } + + aging_context_set_threshold(&ctx, &threshold_config); mac_binding_aging_run_for_datapath(od->sb, sbrec_mac_binding_by_datapath, &ctx); + threshold_config_destroy(&threshold_config); + if (aging_context_is_at_limit(&ctx)) { /* Schedule the next run after specified delay. */ ctx.next_wake_ms = AGING_BULK_REMOVAL_DELAY_MSEC; @@ -255,7 +517,7 @@ fdb_run_for_datapath(const struct sbrec_datapath_binding *dp, const struct sbrec_fdb *fdb; SBREC_FDB_FOR_EACH_EQUAL (fdb, fdb_index_row, fdb_by_dp_key) { - if (aging_context_handle_timestamp(ctx, fdb->timestamp)) { + if (aging_context_handle_timestamp(ctx, fdb->timestamp, NULL)) { sbrec_fdb_delete(fdb); if (aging_context_is_at_limit(ctx)) { break; @@ -293,11 +555,16 @@ en_fdb_aging_run(struct engine_node *node, void *data OVS_UNUSED) continue; } - uint64_t threshold = - smap_get_uint(&od->nbs->other_config, "fdb_age_threshold", 0); - aging_context_set_threshold(&ctx, threshold * 1000); - + struct threshold_config threshold_config; + if (!parse_aging_threshold(smap_get(&od->nbs->other_config, + "fdb_age_threshold"), + &threshold_config)) { + return; + } + aging_context_set_threshold(&ctx, &threshold_config); fdb_run_for_datapath(od->sb, sbrec_fdb_by_dp_key, &ctx); + threshold_config_destroy(&threshold_config); + if (aging_context_is_at_limit(&ctx)) { /* Schedule the next run after specified delay. */ ctx.next_wake_ms = AGING_BULK_REMOVAL_DELAY_MSEC; diff --git a/northd/aging.h b/northd/aging.h index 650990e244aa..2cfe2fd44457 100644 --- a/northd/aging.h +++ b/northd/aging.h @@ -30,6 +30,9 @@ void *en_mac_binding_aging_waker_init(struct engine_node *node, struct engine_arg *arg); void en_mac_binding_aging_waker_cleanup(void *data); +/* The MAC binding aging helper function. */ +unsigned int min_mac_binding_age_threshold(const char *opt); + /* The FDB aging node functions. */ void en_fdb_aging_run(struct engine_node *node, void *data); void *en_fdb_aging_init(struct engine_node *node, struct engine_arg *arg); diff --git a/northd/northd.c b/northd/northd.c index f8b046d83e85..4bc898982cf2 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -17,6 +17,7 @@ #include <stdlib.h> #include <stdio.h> +#include "aging.h" #include "debug.h" #include "bitmap.h" #include "coverage.h" @@ -1166,8 +1167,14 @@ ovn_datapath_update_external_ids(struct ovn_datapath *od) smap_add(&ids, "always_learn_from_arp_request", "false"); } - uint32_t age_threshold = smap_get_uint(&od->nbr->options, - "mac_binding_age_threshold", 0); + /* For timestamp refreshing, the smallest threshold of the option is + * set to SB to make sure all entries are refreshed in time. + * XXX: This approach simplifies processing in ovn-controller, but it + * may be enhanced, if necessary, to parse the complete CIDR-based + * threshold configurations to SB to reduce unnecessary refreshes. */ + uint32_t age_threshold = min_mac_binding_age_threshold( + smap_get(&od->nbr->options, + "mac_binding_age_threshold")); if (age_threshold) { smap_add_format(&ids, "mac_binding_age_threshold", "%u", age_threshold); diff --git a/ovn-nb.xml b/ovn-nb.xml index 1de0c30416ce..80733273484e 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -2686,10 +2686,65 @@ or </column> <column name="options" key="mac_binding_age_threshold" - type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'> - MAC binding aging <code>threshold</code> value in seconds. MAC binding - exceeding this timeout will be automatically removed. The value - defaults to 0, which means disabled. + type='{"type": "string"}'> + <p> + Specifies the MAC binding aging thresholds based on CIDRs, with the + format: <var>entry</var>[<code>;</code><var>entry</var>[...]], + where each <var>entry</var> has the format: + [<var>cidr</var><code>:</code>]<var>threshold</var> + </p> + + <ul> + <li> + <var>cidr</var>: Can be either an IPv4 or IPv6 CIDR. + </li> + <li> + <var>threshold</var>: Threshold value in seconds. MAC bindings with + IP addresses matching the specified CIDR that exceed this timeout + will be automatically removed. + </li> + </ul> + + <p> + If an <var>entry</var> is provided without an CIDR (just the + threshold value), it specifies the default threshold for MAC + bindings that don't match any of the given CIDRs. If there are + multiple default threshold entries in the option, the behavior is + undefined. + </p> + + <p> + If there are multiple CIDRs matching a MAC binding IP, the one with + the longest prefix length takes effect. If there are multiple + entries with the same CIDR in the option, the behavior is + undefined. + </p> + + <p> + If no matching CIDR is found for a MAC binding IP, and no default + threshold is specified, the behavior defaults to the original: the + binding will not be removed based on age. + </p> + + <p> + The value can also default to an empty string, which means that the + aging threshold is disabled. Any string not in the above format is + regarded as invalid and the aging is disabled. + </p> + + <p> + Example: + <code>192.168.0.0/16:300;192.168.10.0/24:0;fe80::/10:600;1200</code> + </p> + + <p> + This sets a threshold of 300 seconds for MAC bindings with IP + addresses in the 192.168.0.0/16 range, excluding the 192.168.1.0/24 + range (for which the aging is disabled), a threshold of 600 seconds + for MAC bindings with IP addresses in the fe80::/10 IPv6 range, and + a default threshold of 1200 seconds for all other MAC bindings. + </p> + </column> </group> diff --git a/tests/ovn.at b/tests/ovn.at index 637d92bedbc7..07b5c1ec57fc 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -34676,6 +34676,66 @@ OVS_WAIT_UNTIL([ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=79 --no-stats | strip_cookie], [0], []) +# Test CIDR-based threshold configuration +check ovn-nbctl set logical_router gw options:mac_binding_age_threshold="192.168.10.0/255.255.255.0:2;192.168.10.64/26:0;192.168.10.20:0" +check ovn-nbctl --wait=sb sync +uuid=$(fetch_column datapath _uuid external_ids:name=gw) +AT_CHECK([ovn-sbctl get datapath $uuid external_ids:mac_binding_age_threshold], [0], [dnl +"2" +]) + +# Send GARP to populate MAC binding table records +send_garp hv1 ext1 10 # belong to 192.168.10.0/24 +send_garp hv2 ext2 20 # belong to 192.168.10.20/32 +send_garp hv2 ext2 65 # belong to 192.168.10.64/26 + +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.10"]) +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.20"]) +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.65"]) + +OVS_WAIT_UNTIL([ + test "0" = "$(ovn-sbctl list mac_binding | grep -c '192.168.10.10')" +]) +# The other two should remain because the corresponding prefixes have threshold 0 +AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.20"]) +AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.65"]) +check ovn-sbctl --all destroy mac_binding + +# Set the aging threshold mixed with IPv6 prefixes and default threshold +check ovn-nbctl set logical_router gw options:mac_binding_age_threshold="1;192.168.10.64/26:0;ff00:1234::/32:888" +check ovn-nbctl --wait=sb sync +uuid=$(fetch_column datapath _uuid external_ids:name=gw) +AT_CHECK([ovn-sbctl get datapath $uuid external_ids:mac_binding_age_threshold], [0], [dnl +"1" +]) + +# Send GARP to populate MAC binding table records +send_garp hv1 ext1 10 # belong to default +send_garp hv2 ext2 65 # belong to 192.168.10.64/26 + +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.10"]) +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.65"]) + +OVS_WAIT_UNTIL([ + test "0" = "$(ovn-sbctl list mac_binding | grep -c '192.168.10.10')" +]) +AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.65"]) +check ovn-sbctl --all destroy mac_binding + +# Set the aging threshold with invalid format +check ovn-nbctl set logical_router gw options:mac_binding_age_threshold="1;abc/26:0" +check ovn-nbctl --wait=sb sync +uuid=$(fetch_column datapath _uuid external_ids:name=gw) +AT_CHECK([ovn-sbctl get datapath $uuid external_ids:mac_binding_age_threshold], [1], [ignore], [ignore]) + +# Send GARP to populate MAC binding table records +send_garp hv1 ext1 10 + +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.10"]) +# The record is not deleted +sleep 5 +AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.10"]) + OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP ]) -- 2.38.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev