Enhance MAC_Binding aging to allow CIDR-based threshold configurations.
This enables distinct threshold settings for different IP ranges,
applying the longest prefix matching for overlapping ranges.

A common use case involves setting a default threshold for all IPs, while
disabling aging for a specific range and potentially excluding a subset
within that range.

Signed-off-by: Han Zhou <hz...@ovn.org>
---
 northd/aging.c  | 297 +++++++++++++++++++++++++++++++++++++++++++++---
 northd/aging.h  |   3 +
 northd/northd.c |  11 +-
 ovn-nb.xml      |  63 +++++++++-
 tests/ovn.at    |  60 ++++++++++
 5 files changed, 413 insertions(+), 21 deletions(-)

diff --git a/northd/aging.c b/northd/aging.c
index f626c72c8ca3..e5868211a63b 100644
--- a/northd/aging.c
+++ b/northd/aging.c
@@ -47,12 +47,253 @@ aging_waker_schedule_next_wake(struct aging_waker *waker, 
int64_t next_wake_ms)
     }
 }
 
+struct threshold_entry {
+    union {
+        ovs_be32 ipv4;
+        struct in6_addr ipv6;
+    } prefix;
+    bool is_v4;
+    unsigned int plen;
+    unsigned int threshold;
+};
+
+/* Contains CIDR-based aging threshold configuration parsed from
+ * "Logical_Router:options:mac_binding_age_threshold".
+ *
+ * This struct is also used for non-CIDR-based threshold, e.g. the ones from
+ * "NB_Global:other_config:fdb_age_threshold" for the common aging_context
+ * interface.
+ *
+ * - The arrays `v4_entries` and `v6_entries` are populated with parsed entries
+ *   for IPv4 and IPv6 CIDRs, respectively, along with their associated
+ *   thresholds.  Entries within these arrays are sorted by prefix length,
+ *   starting with the longest.
+ *
+ * - If a threshold is provided without an accompanying prefix, it's captured
+ *   in `default_threshold`.  In cases with multiple unprefixed thresholds,
+ *   `default_threshold` will only store the last one.  */
+struct threshold_config {
+    struct threshold_entry *v4_entries;
+    int n_v4_entries;
+    struct threshold_entry *v6_entries;
+    int n_v6_entries;
+    unsigned int default_threshold;
+};
+
+static int
+compare_entries_by_prefix_length(const void *a, const void *b)
+{
+    const struct threshold_entry *entry_a = a;
+    const struct threshold_entry *entry_b = b;
+
+    return entry_b->plen - entry_a->plen;
+}
+
+/* Parse an ENTRY in the threshold option, with the format:
+ * [CIDR:]THRESHOLD
+ *
+ * Returns true if successful, false if failed. */
+static bool
+parse_threshold_entry(const char *str, struct threshold_entry *entry)
+{
+    char *colon_ptr;
+    unsigned int value;
+    const char *threshold_str;
+
+    colon_ptr = strrchr(str, ':');
+    if (!colon_ptr) {
+        threshold_str = str;
+        entry->plen = 0;
+    } else {
+        threshold_str = colon_ptr + 1;
+    }
+
+    if (!str_to_uint(threshold_str, 10, &value)) {
+        return false;
+    }
+    entry->threshold = value;
+
+    if (!colon_ptr) {
+        return true;
+    }
+
+    /* ":" was found, so parse the string before ":" as a cidr. */
+    char ip_cidr[128];
+    ovs_strzcpy(ip_cidr, str, MIN(colon_ptr - str + 1, sizeof ip_cidr));
+    char *error = ip_parse_cidr(ip_cidr, &entry->prefix.ipv4, &entry->plen);
+    if (!error) {
+        entry->is_v4 = true;
+        return true;
+    }
+    free(error);
+    error = ipv6_parse_cidr(ip_cidr, &entry->prefix.ipv6, &entry->plen);
+    if (!error) {
+        entry->is_v4 = false;
+        return true;
+    }
+    free(error);
+    return false;
+}
+
+static void
+threshold_config_destroy(struct threshold_config *config)
+{
+    free(config->v4_entries);
+    free(config->v6_entries);
+    config->v4_entries = config->v6_entries = NULL;
+    config->n_v4_entries = config->n_v6_entries = 0;
+    config->default_threshold = 0;
+}
+
+/* Parse the threshold option string, which has the format:
+ * ENTRY[;ENTRY[...]]
+ *
+ * For the exact format of ENTRY, refer to the function
+ * `parse_threshold_entry`.
+ *
+ * The parsed data is populated to the struct threshold_config.
+ * See the comments of struct threshold_config for details.
+ *
+ * Return Values:
+ * - Returns `false` if the input does not match the expected format.
+ *   Consequently, no entries will be populated.
+ * - Returns `true` upon successful parsing. The caller is responsible for
+ *   releasing the allocated memory by calling threshold_config_destroy. */
+static bool
+parse_aging_threshold(const char *opt,
+                      struct threshold_config *config)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+    struct threshold_entry e;
+    const char *start = opt, *end;
+    memset(config, 0, sizeof *config);
+
+    while (start && *start) {
+        end = strchr(start, ';');
+        if (end) {
+            /* Big enough for any <ipv6>/<ipv6 mask>:<threshold> */
+            char buf[128];
+            ovs_strzcpy(buf, start, MIN(end - start + 1, sizeof buf));
+            if (!parse_threshold_entry(buf, &e)) {
+                VLOG_WARN_RL(&rl, "Parsing aging threshold '%s' failed.", buf);
+                goto err;
+            }
+            start = end + 1;
+        } else {
+            if (!parse_threshold_entry(start, &e)) {
+                VLOG_WARN_RL(&rl, "Parsing aging threshold '%s' failed.",
+                             start);
+                goto err;
+            }
+            start += strlen(start);
+        }
+        if (!e.plen) {
+            config->default_threshold = e.threshold;
+        } else if (e.is_v4) {
+            config->n_v4_entries++;
+            config->v4_entries = xrealloc(config->v4_entries,
+                                          config->n_v4_entries * sizeof e);
+            config->v4_entries[config->n_v4_entries - 1] = e;
+        } else {
+            config->n_v6_entries++;
+            config->v6_entries = xrealloc(config->v6_entries,
+                                          config->n_v6_entries * sizeof e);
+            config->v6_entries[config->n_v6_entries - 1] = e;
+        }
+    }
+
+    if (config->n_v4_entries > 0) {
+        qsort(config->v4_entries, config->n_v4_entries, sizeof e,
+              compare_entries_by_prefix_length);
+    }
+
+    if (config->n_v6_entries > 0) {
+        qsort(config->v6_entries, config->n_v6_entries, sizeof e,
+              compare_entries_by_prefix_length);
+    }
+
+    return true;
+
+err:
+    threshold_config_destroy(config);
+    return false;
+}
+
+static unsigned int
+find_threshold_for_ip(const char *ip_str,
+                      const struct threshold_config *config)
+{
+    if (!ip_str) {
+        return config->default_threshold;
+    }
+
+    ovs_be32 ipv4;
+    struct in6_addr ipv6;
+    if (ip_parse(ip_str, &ipv4)) {
+        for (int i = 0; i < config->n_v4_entries; i++) {
+            ovs_be32 masked_ip = ipv4 &
+                be32_prefix_mask(config->v4_entries[i].plen);
+            if (masked_ip == config->v4_entries[i].prefix.ipv4) {
+                return config->v4_entries[i].threshold;
+            }
+        }
+    } else if (ipv6_parse(ip_str, &ipv6)) {
+        for (int i = 0; i < config->n_v6_entries; i++) {
+            struct in6_addr v6_mask =
+                ipv6_create_mask(config->v6_entries[i].plen);
+            struct in6_addr masked_ip = ipv6_addr_bitand(&ipv6, &v6_mask);
+            if (ipv6_addr_equals(&masked_ip,
+                                 &config->v6_entries[i].prefix.ipv6)) {
+                return config->v6_entries[i].threshold;
+            }
+        }
+    }
+    return config->default_threshold;
+}
+
+/* Parse the threshold option string (see the comment of the function
+ * parse_aging_threshold), and returns the smallest threshold. */
+unsigned int
+min_mac_binding_age_threshold(const char *opt)
+{
+    struct threshold_config config;
+    if (!parse_aging_threshold(opt, &config)) {
+        return 0;
+    }
+
+    unsigned int threshold = UINT_MAX;
+    unsigned int t;
+
+    for (int i = 0; i < config.n_v4_entries; i++) {
+        t = config.v4_entries[i].threshold;
+        if (t && t < threshold) {
+            threshold = t;
+        }
+    }
+
+    for (int i = 0; i < config.n_v6_entries; i++) {
+        t = config.v6_entries[i].threshold;
+        if (t && t < threshold) {
+            threshold = t;
+        }
+    }
+
+    t = config.default_threshold;
+    if (t && t < threshold) {
+        threshold = t;
+    }
+
+    threshold_config_destroy(&config);
+
+    return threshold == UINT_MAX ? 0 : threshold;
+}
+
 struct aging_context {
     int64_t next_wake_ms;
     int64_t time_wall_now;
     uint32_t removal_limit;
     uint32_t n_removed;
-    uint64_t threshold;
+    struct threshold_config *threshold;
 };
 
 static struct aging_context
@@ -63,13 +304,14 @@ aging_context_init(uint32_t removal_limit)
            .time_wall_now = time_wall_msec(),
            .removal_limit = removal_limit,
            .n_removed = 0,
-           .threshold = 0,
+           .threshold = NULL,
     };
     return ctx;
 }
 
 static void
-aging_context_set_threshold(struct aging_context *ctx, uint64_t threshold)
+aging_context_set_threshold(struct aging_context *ctx,
+                            struct threshold_config *threshold)
 {
     ctx->threshold = threshold;
 }
@@ -81,19 +323,27 @@ aging_context_is_at_limit(struct aging_context *ctx)
 }
 
 static bool
-aging_context_handle_timestamp(struct aging_context *ctx, int64_t timestamp)
+aging_context_handle_timestamp(struct aging_context *ctx, int64_t timestamp,
+                               const char *ip)
 {
     int64_t elapsed = ctx->time_wall_now - timestamp;
     if (elapsed < 0) {
         return false;
     }
 
-    if (elapsed >= ctx->threshold) {
+    ovs_assert(ctx->threshold);
+    uint64_t threshold = 1000 * find_threshold_for_ip(ip, ctx->threshold);
+
+    if (!threshold) {
+        return false;
+    }
+
+    if (elapsed >= threshold) {
         ctx->n_removed++;
         return true;
     }
 
-    ctx->next_wake_ms = MIN(ctx->next_wake_ms, (ctx->threshold - elapsed));
+    ctx->next_wake_ms = MIN(ctx->next_wake_ms, (threshold - elapsed));
     return false;
 }
 
@@ -121,13 +371,18 @@ mac_binding_aging_run_for_datapath(const struct 
sbrec_datapath_binding *dp,
         return;
     }
 
+    if (!ctx->threshold->n_v4_entries && !ctx->threshold->n_v6_entries
+        && !ctx->threshold->default_threshold) {
+        return;
+    }
+
     struct sbrec_mac_binding *mb_index_row =
         sbrec_mac_binding_index_init_row(mb_by_datapath);
     sbrec_mac_binding_index_set_datapath(mb_index_row, dp);
 
     const struct sbrec_mac_binding *mb;
     SBREC_MAC_BINDING_FOR_EACH_EQUAL (mb, mb_index_row, mb_by_datapath) {
-        if (aging_context_handle_timestamp(ctx, mb->timestamp)) {
+        if (aging_context_handle_timestamp(ctx, mb->timestamp, mb->ip)) {
             sbrec_mac_binding_delete(mb);
             if (aging_context_is_at_limit(ctx)) {
                 break;
@@ -166,13 +421,20 @@ en_mac_binding_aging_run(struct engine_node *node, void 
*data OVS_UNUSED)
             continue;
         }
 
-        uint64_t threshold = smap_get_uint(&od->nbr->options,
-                                           "mac_binding_age_threshold", 0);
-        aging_context_set_threshold(&ctx, threshold * 1000);
+        struct threshold_config threshold_config;
+        if (!parse_aging_threshold(smap_get(&od->nbr->options,
+                                            "mac_binding_age_threshold"),
+                                   &threshold_config)) {
+            return;
+        }
+
+        aging_context_set_threshold(&ctx, &threshold_config);
 
         mac_binding_aging_run_for_datapath(od->sb,
                                            sbrec_mac_binding_by_datapath,
                                            &ctx);
+        threshold_config_destroy(&threshold_config);
+
         if (aging_context_is_at_limit(&ctx)) {
             /* Schedule the next run after specified delay. */
             ctx.next_wake_ms = AGING_BULK_REMOVAL_DELAY_MSEC;
@@ -255,7 +517,7 @@ fdb_run_for_datapath(const struct sbrec_datapath_binding 
*dp,
 
     const struct sbrec_fdb *fdb;
     SBREC_FDB_FOR_EACH_EQUAL (fdb, fdb_index_row, fdb_by_dp_key) {
-        if (aging_context_handle_timestamp(ctx, fdb->timestamp)) {
+        if (aging_context_handle_timestamp(ctx, fdb->timestamp, NULL)) {
             sbrec_fdb_delete(fdb);
             if (aging_context_is_at_limit(ctx)) {
                 break;
@@ -293,11 +555,16 @@ en_fdb_aging_run(struct engine_node *node, void *data 
OVS_UNUSED)
             continue;
         }
 
-        uint64_t threshold =
-                smap_get_uint(&od->nbs->other_config, "fdb_age_threshold", 0);
-        aging_context_set_threshold(&ctx, threshold * 1000);
-
+        struct threshold_config threshold_config;
+        if (!parse_aging_threshold(smap_get(&od->nbs->other_config,
+                                            "fdb_age_threshold"),
+                                   &threshold_config)) {
+            return;
+        }
+        aging_context_set_threshold(&ctx, &threshold_config);
         fdb_run_for_datapath(od->sb, sbrec_fdb_by_dp_key, &ctx);
+        threshold_config_destroy(&threshold_config);
+
         if (aging_context_is_at_limit(&ctx)) {
             /* Schedule the next run after specified delay. */
             ctx.next_wake_ms = AGING_BULK_REMOVAL_DELAY_MSEC;
diff --git a/northd/aging.h b/northd/aging.h
index 650990e244aa..2cfe2fd44457 100644
--- a/northd/aging.h
+++ b/northd/aging.h
@@ -30,6 +30,9 @@ void *en_mac_binding_aging_waker_init(struct engine_node 
*node,
                                       struct engine_arg *arg);
 void en_mac_binding_aging_waker_cleanup(void *data);
 
+/* The MAC binding aging helper function. */
+unsigned int min_mac_binding_age_threshold(const char *opt);
+
 /* The FDB aging node functions. */
 void en_fdb_aging_run(struct engine_node *node, void *data);
 void *en_fdb_aging_init(struct engine_node *node, struct engine_arg *arg);
diff --git a/northd/northd.c b/northd/northd.c
index f8b046d83e85..4bc898982cf2 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -17,6 +17,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 
+#include "aging.h"
 #include "debug.h"
 #include "bitmap.h"
 #include "coverage.h"
@@ -1166,8 +1167,14 @@ ovn_datapath_update_external_ids(struct ovn_datapath *od)
             smap_add(&ids, "always_learn_from_arp_request", "false");
         }
 
-        uint32_t age_threshold = smap_get_uint(&od->nbr->options,
-                                               "mac_binding_age_threshold", 0);
+        /* For timestamp refreshing, the smallest threshold of the option is
+         * set to SB to make sure all entries are refreshed in time.
+         * XXX: This approach simplifies processing in ovn-controller, but it
+         * may be enhanced, if necessary, to parse the complete CIDR-based
+         * threshold configurations to SB to reduce unnecessary refreshes. */
+        uint32_t age_threshold = min_mac_binding_age_threshold(
+                                       smap_get(&od->nbr->options,
+                                               "mac_binding_age_threshold"));
         if (age_threshold) {
             smap_add_format(&ids, "mac_binding_age_threshold",
                             "%u", age_threshold);
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 1de0c30416ce..80733273484e 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -2686,10 +2686,65 @@ or
       </column>
 
       <column name="options" key="mac_binding_age_threshold"
-              type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>
-        MAC binding aging <code>threshold</code> value in seconds. MAC binding
-        exceeding this timeout will be automatically removed. The value
-        defaults to 0, which means disabled.
+              type='{"type": "string"}'>
+        <p>
+            Specifies the MAC binding aging thresholds based on CIDRs, with the
+            format: <var>entry</var>[<code>;</code><var>entry</var>[...]],
+            where each <var>entry</var> has the format:
+            [<var>cidr</var><code>:</code>]<var>threshold</var>
+        </p>
+
+        <ul>
+          <li>
+            <var>cidr</var>: Can be either an IPv4 or IPv6 CIDR.
+          </li>
+          <li>
+            <var>threshold</var>: Threshold value in seconds. MAC bindings with
+            IP addresses matching the specified CIDR that exceed this timeout
+            will be automatically removed.
+          </li>
+        </ul>
+
+        <p>
+            If an <var>entry</var> is provided without an CIDR (just the
+            threshold value), it specifies the default threshold for MAC
+            bindings that don't match any of the given CIDRs. If there are
+            multiple default threshold entries in the option, the behavior is
+            undefined.
+        </p>
+
+        <p>
+            If there are multiple CIDRs matching a MAC binding IP, the one with
+            the longest prefix length takes effect. If there are multiple
+            entries with the same CIDR in the option, the behavior is
+            undefined.
+        </p>
+
+        <p>
+            If no matching CIDR is found for a MAC binding IP, and no default
+            threshold is specified, the behavior defaults to the original: the
+            binding will not be removed based on age.
+        </p>
+
+        <p>
+            The value can also default to an empty string, which means that the
+            aging threshold is disabled. Any string not in the above format is
+            regarded as invalid and the aging is disabled.
+        </p>
+
+        <p>
+            Example:
+            
<code>192.168.0.0/16:300;192.168.10.0/24:0;fe80::/10:600;1200</code>
+        </p>
+
+        <p>
+            This sets a threshold of 300 seconds for MAC bindings with IP
+            addresses in the 192.168.0.0/16 range, excluding the 192.168.1.0/24
+            range (for which the aging is disabled), a threshold of 600 seconds
+            for MAC bindings with IP addresses in the fe80::/10 IPv6 range, and
+            a default threshold of 1200 seconds for all other MAC bindings.
+        </p>
+
       </column>
     </group>
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 637d92bedbc7..07b5c1ec57fc 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -34676,6 +34676,66 @@ OVS_WAIT_UNTIL([
 
 AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=79 --no-stats | 
strip_cookie], [0], [])
 
+# Test CIDR-based threshold configuration
+check ovn-nbctl set logical_router gw 
options:mac_binding_age_threshold="192.168.10.0/255.255.255.0:2;192.168.10.64/26:0;192.168.10.20:0"
+check ovn-nbctl --wait=sb sync
+uuid=$(fetch_column datapath _uuid external_ids:name=gw)
+AT_CHECK([ovn-sbctl get datapath $uuid 
external_ids:mac_binding_age_threshold], [0], [dnl
+"2"
+])
+
+# Send GARP to populate MAC binding table records
+send_garp hv1 ext1 10 # belong to 192.168.10.0/24
+send_garp hv2 ext2 20 # belong to 192.168.10.20/32
+send_garp hv2 ext2 65 # belong to 192.168.10.64/26
+
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.10"])
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.20"])
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.65"])
+
+OVS_WAIT_UNTIL([
+    test "0" = "$(ovn-sbctl list mac_binding | grep -c '192.168.10.10')"
+])
+# The other two should remain because the corresponding prefixes have 
threshold 0
+AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.20"])
+AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.65"])
+check ovn-sbctl --all destroy mac_binding
+
+# Set the aging threshold mixed with IPv6 prefixes and default threshold
+check ovn-nbctl set logical_router gw 
options:mac_binding_age_threshold="1;192.168.10.64/26:0;ff00:1234::/32:888"
+check ovn-nbctl --wait=sb sync
+uuid=$(fetch_column datapath _uuid external_ids:name=gw)
+AT_CHECK([ovn-sbctl get datapath $uuid 
external_ids:mac_binding_age_threshold], [0], [dnl
+"1"
+])
+
+# Send GARP to populate MAC binding table records
+send_garp hv1 ext1 10 # belong to default
+send_garp hv2 ext2 65 # belong to 192.168.10.64/26
+
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.10"])
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.65"])
+
+OVS_WAIT_UNTIL([
+    test "0" = "$(ovn-sbctl list mac_binding | grep -c '192.168.10.10')"
+])
+AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.65"])
+check ovn-sbctl --all destroy mac_binding
+
+# Set the aging threshold with invalid format
+check ovn-nbctl set logical_router gw 
options:mac_binding_age_threshold="1;abc/26:0"
+check ovn-nbctl --wait=sb sync
+uuid=$(fetch_column datapath _uuid external_ids:name=gw)
+AT_CHECK([ovn-sbctl get datapath $uuid 
external_ids:mac_binding_age_threshold], [1], [ignore], [ignore])
+
+# Send GARP to populate MAC binding table records
+send_garp hv1 ext1 10
+
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "192.168.10.10"])
+# The record is not deleted
+sleep 5
+AT_CHECK([ovn-sbctl list mac_binding | grep -q "192.168.10.10"])
+
 OVN_CLEANUP([hv1], [hv2])
 AT_CLEANUP
 ])
-- 
2.38.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to