Otherwise the S_ROUTER_IN_IP_INPUT aggregated flows that reply to ARP requests targeting load balancer VIPs get completely regenerated every time a new VIP/LB is added. This affects SB memory usage as RAFT log entries are huge. Use an address set instead. Updating an address set is incremental, because it's performed with a "mutate" operation.
On a large scale ovn-kubernetes deployment with a high number of load balancers (services) this change reduces memory usage of ovsdb-servers running the OVN_Southbound cluster by 50%, from ~2GB RSS to ~1GB RSS. Signed-off-by: Dumitru Ceara <[email protected]> (cherry picked from commit c1e3896c0a393c035f301db6c1a8431adda57eb0) --- northd/northd.c | 61 +++++++++++++++++++++++++++++++++------------------ tests/ovn-northd.at | 44 +++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 46 deletions(-) diff --git a/northd/northd.c b/northd/northd.c index aa1040628..502c263cc 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -11967,39 +11967,28 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, op->cr_port->json_key); } - struct ds load_balancer_ips_v4 = DS_EMPTY_INITIALIZER; - - /* For IPv4 we can just create one rule with all required IPs. */ - ds_put_cstr(&load_balancer_ips_v4, "{ "); - ds_put_and_free_cstr(&load_balancer_ips_v4, - sset_join(&op->od->lb_ips_v4, ", ", " }")); - - build_lrouter_arp_flow(op->od, op, ds_cstr(&load_balancer_ips_v4), + /* Create a single ARP rule for all IPs that are used as VIPs. */ + char *lb_ips_v4_as = xasprintf("$%s_lb_ip4", op->od->nbr->name); + build_lrouter_arp_flow(op->od, op, lb_ips_v4_as, REG_INPORT_ETH_ADDR, match, false, 90, NULL, lflows); - ds_destroy(&load_balancer_ips_v4); + free(lb_ips_v4_as); } if (sset_count(&op->od->lb_ips_v6)) { ds_clear(match); - ds_clear(actions); - - struct ds load_balancer_ips_v6 = DS_EMPTY_INITIALIZER; - - ds_put_cstr(&load_balancer_ips_v6, "{ "); - ds_put_and_free_cstr(&load_balancer_ips_v6, - sset_join(&op->od->lb_ips_v6, ", ", " }")); if (is_l3dgw_port(op)) { ds_put_format(match, "is_chassis_resident(%s)", op->cr_port->json_key); } - build_lrouter_nd_flow(op->od, op, "nd_na", - ds_cstr(&load_balancer_ips_v6), NULL, + + /* Create a single ND rule for all IPs that are used as VIPs. */ + char *lb_ips_v6_as = xasprintf("$%s_lb_ip6", op->od->nbr->name); + build_lrouter_nd_flow(op->od, op, "nd_na", lb_ips_v6_as, NULL, REG_INPORT_ETH_ADDR, match, false, 90, NULL, lflows, meter_groups); - - ds_destroy(&load_balancer_ips_v6); + free(lb_ips_v6_as); } if (!op->od->is_gw_router && !op->od->n_l3dgw_ports) { @@ -13571,7 +13560,7 @@ sync_address_set(struct northd_context *ctx, const char *name, * in OVN_Northbound, so that the address sets used in Logical_Flows in * OVN_Southbound is checked against the proper set.*/ static void -sync_address_sets(struct northd_context *ctx) +sync_address_sets(struct northd_context *ctx, struct hmap *datapaths) { struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets); @@ -13618,6 +13607,34 @@ sync_address_sets(struct northd_context *ctx) svec_destroy(&ipv6_addrs); } + /* Sync router load balancer VIP generated address sets. */ + struct ovn_datapath *od; + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbr) { + continue; + } + + if (sset_count(&od->lb_ips_v4)) { + char *ipv4_addrs_name = xasprintf("%s_lb_ip4", od->nbr->name); + const char **ipv4_addrs = sset_array(&od->lb_ips_v4); + + sync_address_set(ctx, ipv4_addrs_name, ipv4_addrs, + sset_count(&od->lb_ips_v4), &sb_address_sets); + free(ipv4_addrs_name); + free(ipv4_addrs); + } + + if (sset_count(&od->lb_ips_v6)) { + char *ipv6_addrs_name = xasprintf("%s_lb_ip6", od->nbr->name); + const char **ipv6_addrs = sset_array(&od->lb_ips_v6); + + sync_address_set(ctx, ipv6_addrs_name, ipv6_addrs, + sset_count(&od->lb_ips_v6), &sb_address_sets); + free(ipv6_addrs_name); + free(ipv6_addrs); + } + } + /* sync user defined address sets, which may overwrite port group * generated address sets if same name is used */ const struct nbrec_address_set *nb_address_set; @@ -14358,7 +14375,7 @@ ovnnb_db_run(struct northd_context *ctx, stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec()); ovn_update_ipv6_prefix(ports); - sync_address_sets(ctx); + sync_address_sets(ctx, datapaths); sync_port_groups(ctx, &port_groups); sync_meters(ctx, &meter_groups); sync_dns_entries(ctx, datapaths); diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 91d90b28e..66cd03bf2 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -1658,8 +1658,8 @@ ovn-nbctl lb-add lb1 "192.168.2.1:8080" "10.0.0.4:8080" ovn-nbctl lb-add lb2 "192.168.2.4:8080" "10.0.0.5:8080" udp ovn-nbctl lb-add lb3 "192.168.2.5:8080" "10.0.0.6:8080" ovn-nbctl lb-add lb4 "192.168.2.6:8080" "10.0.0.7:8080" -ovn-nbctl lb-add lb5 "fe80::200:ff:fe00:101:8080" "fe02::200:ff:fe00:101:8080" -ovn-nbctl lb-add lb5 "fe80::200:ff:fe00:102:8080" "fe02::200:ff:fe00:102:8080" +ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:101]]:8080" "[[fe02::200:ff:fe00:101]]:8080" +ovn-nbctl lb-add lb5 "[[fe80::200:ff:fe00:102]]:8080" "[[fe02::200:ff:fe00:102]]:8080" ovn-nbctl lr-lb-add lr lb1 ovn-nbctl lr-lb-add lr lb2 @@ -1669,6 +1669,10 @@ ovn-nbctl lr-lb-add lr lb5 ovn-nbctl --wait=sb sync +# Check generated VIP address sets. +check_column '192.168.2.1 192.168.2.4 192.168.2.5 192.168.2.6' Address_Set addresses name=lr_lb_ip4 +check_column 'fe80::200:ff:fe00:101 fe80::200:ff:fe00:102' Address_Set addresses name=lr_lb_ip6 + # Ingress router port ETH address is stored in lr_in_admission. AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_admission.*xreg0\[[0..47\]]" | sort], [0], [dnl table=0 (lr_in_admission ), priority=50 , dnl @@ -1685,16 +1689,8 @@ match=(eth.mcast && inport == "lrp-public"), dnl action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;) ]) -# The order of the VIP addresses in the flow table entries doesn't -# matter, so just replace each of them with a generic $vip for -# testing. It would be better if we could ensure each one appeared -# exactly once, but that's hard with sed. -sed_vips() { - sed 's/192\.168\.2\.[[1456]]/$vip/g' -} - # Ingress router port ETH address is used for ARP reply/NA in lr_in_ip_input. -AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sed_vips | sort], [0], [dnl +AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl table=3 (lr_in_ip_input ), priority=90 , dnl match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) @@ -1708,28 +1704,28 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ match=(arp.op == 1 && arp.tpa == 43.43.43.4), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl +match=(inport == "lrp" && arp.op == 1 && arp.tpa == $lr_lb_ip4), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip }), dnl +match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl match=(inport == "lrp" && ip6.dst == {fe80::200:ff:fe00:1, ff02::1:ff00:1} && nd_ns && nd.target == fe80::200:ff:fe00:1), dnl action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 }), dnl +match=(inport == "lrp" && nd_ns && nd.target == $lr_lb_ip6), dnl action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl +match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == $lr_lb_ip4), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip }), dnl +match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100), dnl action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp-public" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 }), dnl +match=(inport == "lrp-public" && nd_ns && nd.target == $lr_lb_ip6), dnl action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ]) @@ -1763,7 +1759,7 @@ action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;) # Ingress router port is used for ARP reply/NA in lr_in_ip_input. # xxreg0[0..47] is used unless external_mac is set. # Priority 90 flows (per router). -AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sed_vips | sort], [0], [dnl +AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl table=3 (lr_in_ip_input ), priority=90 , dnl match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) @@ -1777,28 +1773,28 @@ action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ match=(arp.op == 1 && arp.tpa == 43.43.43.4), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl +match=(inport == "lrp" && arp.op == 1 && arp.tpa == $lr_lb_ip4), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip }), dnl +match=(inport == "lrp" && arp.op == 1 && arp.tpa == 42.42.42.1 && arp.spa == 42.42.42.0/24), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl match=(inport == "lrp" && ip6.dst == {fe80::200:ff:fe00:1, ff02::1:ff00:1} && nd_ns && nd.target == fe80::200:ff:fe00:1), dnl action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 }), dnl +match=(inport == "lrp" && nd_ns && nd.target == $lr_lb_ip6), dnl action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl +match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == $lr_lb_ip4 && is_chassis_resident("cr-lrp-public")), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == { $vip, $vip, $vip, $vip } && is_chassis_resident("cr-lrp-public")), dnl +match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.1 && arp.spa == 43.43.43.0/24), dnl action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; output;) table=3 (lr_in_ip_input ), priority=90 , dnl match=(inport == "lrp-public" && ip6.dst == {fe80::200:ff:fe00:100, ff02::1:ff00:100} && nd_ns && nd.target == fe80::200:ff:fe00:100 && is_chassis_resident("cr-lrp-public")), dnl action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) table=3 (lr_in_ip_input ), priority=90 , dnl -match=(inport == "lrp-public" && nd_ns && nd.target == { fe80::200:ff:fe00:101:8080, fe80::200:ff:fe00:102:8080 } && is_chassis_resident("cr-lrp-public")), dnl +match=(inport == "lrp-public" && nd_ns && nd.target == $lr_lb_ip6 && is_chassis_resident("cr-lrp-public")), dnl action=(nd_na { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) ]) _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
