On Fri, Aug 18, 2023 at 1:58 AM <[email protected]> wrote:
>
> From: Numan Siddique <[email protected]>
>
> A new engine node 'sync_to_sb_pb' is added within 'sync_to_sb'
> node to sync NAT column of Port bindings table. This separation
> is required in order to add load balancer group I-P handling
> in 'northd' engine node (which is handled in the next commit).
>
> 'sync_to_sb_pb' engine node can be later expanded to sync other
> Port binding columns if required.
>
> Signed-off-by: Numan Siddique <[email protected]>
> ---
> northd/en-sync-sb.c | 31 +++++
> northd/en-sync-sb.h | 4 +
> northd/inc-proc-northd.c | 8 +-
> northd/northd.c | 243 +++++++++++++++++++++------------------
> northd/northd.h | 2 +
> 5 files changed, 174 insertions(+), 114 deletions(-)
>
> diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c
> index 9832fce30a..552ed56452 100644
> --- a/northd/en-sync-sb.c
> +++ b/northd/en-sync-sb.c
> @@ -254,6 +254,37 @@ sync_to_sb_lb_northd_handler(struct engine_node
*node, void *data OVS_UNUSED)
> return false;
> }
>
> +/* sync_to_sb_pb engine node functions.
> + * This engine node syncs the SB Port Bindings (partly).
> + * en_northd engine create the SB Port binding rows and
> + * updates most of the columns.
> + * This engine node updates the port binding columns which
> + * needs to be updated after northd engine is run.
> + */
> +
> +void *
> +en_sync_to_sb_pb_init(struct engine_node *node OVS_UNUSED,
> + struct engine_arg *arg OVS_UNUSED)
> +{
> + return NULL;
> +}
> +
> +void
> +en_sync_to_sb_pb_run(struct engine_node *node, void *data OVS_UNUSED)
> +{
> + const struct engine_context *eng_ctx = engine_get_context();
> + struct northd_data *northd_data = engine_get_input_data("northd",
node);
> +
> + sync_pbs(eng_ctx->ovnsb_idl_txn, &northd_data->ls_ports);
> + engine_set_node_state(node, EN_UPDATED);
> +}
> +
> +void
> +en_sync_to_sb_pb_cleanup(void *data OVS_UNUSED)
> +{
> +
> +}
> +
> /* static functions. */
> static void
> sync_addr_set(struct ovsdb_idl_txn *ovnsb_txn, const char *name,
> diff --git a/northd/en-sync-sb.h b/northd/en-sync-sb.h
> index 06d2a57710..700d3340e4 100644
> --- a/northd/en-sync-sb.h
> +++ b/northd/en-sync-sb.h
> @@ -22,4 +22,8 @@ void en_sync_to_sb_lb_run(struct engine_node *, void
*data);
> void en_sync_to_sb_lb_cleanup(void *data);
> bool sync_to_sb_lb_northd_handler(struct engine_node *, void *data
OVS_UNUSED);
>
> +void *en_sync_to_sb_pb_init(struct engine_node *, struct engine_arg *);
> +void en_sync_to_sb_pb_run(struct engine_node *, void *data);
> +void en_sync_to_sb_pb_cleanup(void *data);
> +
> #endif /* end of EN_SYNC_SB_H */
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index 402c94e88c..dc8b880fd8 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -141,6 +141,7 @@ static ENGINE_NODE(sync_to_sb_addr_set,
"sync_to_sb_addr_set");
> static ENGINE_NODE(fdb_aging, "fdb_aging");
> static ENGINE_NODE(fdb_aging_waker, "fdb_aging_waker");
> static ENGINE_NODE(sync_to_sb_lb, "sync_to_sb_lb");
> +static ENGINE_NODE(sync_to_sb_pb, "sync_to_sb_pb");
> static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data");
>
> void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> @@ -215,13 +216,16 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> sync_to_sb_lb_northd_handler);
> engine_add_input(&en_sync_to_sb_lb, &en_sb_load_balancer, NULL);
>
> + engine_add_input(&en_sync_to_sb_pb, &en_northd, NULL);
NULL handler here always triggers recompute for any en_northd change, which
causes a performance regression for VIF I-P. Before this change, only
tracked LSP changes will have related SB port-binding synced to SB, but now
it iterates through all the ls_ports and resync each of them even if there
is only one LSP in tracked_changes.
I ran the scale test of the simulated ovn-k8s topology of 500 chassis x 50
lsp, for each single VIF creation/deletion the "ovn-northd completion" time
increased from 25ms to 40ms - nearly doubled.
I think a simple handler can be implemented so that only sync tracked LSPs,
and fallback to recompute only if changes are not tracked.
Thanks,
Han
> +
> /* en_sync_to_sb engine node syncs the SB database tables from
> * the NB database tables.
> - * Right now this engine syncs the SB Address_Set table and
> - * SB Load_Balancer table.
> + * Right now this engine syncs the SB Address_Set table,
> + * SB Load_Balancer table and (partly) SB Port_Binding table.
> */
> engine_add_input(&en_sync_to_sb, &en_sync_to_sb_addr_set, NULL);
> engine_add_input(&en_sync_to_sb, &en_sync_to_sb_lb, NULL);
> + engine_add_input(&en_sync_to_sb, &en_sync_to_sb_pb, NULL);
>
> engine_add_input(&en_sync_from_sb, &en_northd,
> sync_from_sb_northd_handler);
> diff --git a/northd/northd.c b/northd/northd.c
> index 1477b79331..a3bd21e0b4 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -3514,8 +3514,6 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn
*ovnsb_txn,
> ds_destroy(&s);
>
> sbrec_port_binding_set_external_ids(op->sb,
&op->nbrp->external_ids);
> -
> - sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
> } else {
> if (!lsp_is_router(op->nbsp)) {
> uint32_t queue_id = smap_get_int(
> @@ -3631,116 +3629,6 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn
*ovnsb_txn,
> } else {
> sbrec_port_binding_set_options(op->sb, NULL);
> }
> - const char *nat_addresses = smap_get(&op->nbsp->options,
> - "nat-addresses");
> - size_t n_nats = 0;
> - char **nats = NULL;
> - bool l3dgw_ports = op->peer && op->peer->od &&
> - op->peer->od->n_l3dgw_ports;
> - if (nat_addresses && !strcmp(nat_addresses, "router")) {
> - if (op->peer && op->peer->od
> - && (chassis || op->peer->od->n_l3dgw_ports)) {
> - bool exclude_lb_vips =
smap_get_bool(&op->nbsp->options,
> - "exclude-lb-vips-from-garp", false);
> - nats = get_nat_addresses(op->peer, &n_nats, false,
> - !exclude_lb_vips);
> - }
> - } else if (nat_addresses && (chassis || l3dgw_ports)) {
> - struct lport_addresses laddrs;
> - if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
> - static struct vlog_rate_limit rl =
> - VLOG_RATE_LIMIT_INIT(1, 1);
> - VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
> - } else {
> - destroy_lport_addresses(&laddrs);
> - n_nats = 1;
> - nats = xcalloc(1, sizeof *nats);
> - struct ds nat_addr = DS_EMPTY_INITIALIZER;
> - ds_put_format(&nat_addr, "%s", nat_addresses);
> - if (l3dgw_ports) {
> - const struct ovn_port *l3dgw_port = (
> - is_l3dgw_port(op->peer)
> - ? op->peer
> - : op->peer->od->l3dgw_ports[0]);
> - ds_put_format(&nat_addr, "
is_chassis_resident(%s)",
> - l3dgw_port->cr_port->json_key);
> - }
> - nats[0] = xstrdup(ds_cstr(&nat_addr));
> - ds_destroy(&nat_addr);
> - }
> - }
> -
> - /* Add the router mac and IPv4 addresses to
> - * Port_Binding.nat_addresses so that GARP is sent for these
> - * IPs by the ovn-controller on which the distributed gateway
> - * router port resides if:
> - *
> - * - op->peer has 'reside-on-redirect-chassis' set and the
> - * the logical router datapath has distributed router
port.
> - *
> - * - op->peer is distributed gateway router port.
> - *
> - * - op->peer's router is a gateway router and op has a
localnet
> - * port.
> - *
> - * Note: Port_Binding.nat_addresses column is also used for
> - * sending the GARPs for the router port IPs.
> - * */
> - bool add_router_port_garp = false;
> - if (op->peer && op->peer->nbrp &&
op->peer->od->n_l3dgw_ports) {
> - if (is_l3dgw_port(op->peer)) {
> - add_router_port_garp = true;
> - } else if (smap_get_bool(&op->peer->nbrp->options,
> - "reside-on-redirect-chassis", false)) {
> - if (op->peer->od->n_l3dgw_ports == 1) {
> - add_router_port_garp = true;
> - } else {
> - static struct vlog_rate_limit rl =
> - VLOG_RATE_LIMIT_INIT(1, 1);
> - VLOG_WARN_RL(&rl,
"\"reside-on-redirect-chassis\" is "
> - "set on logical router port %s,
which "
> - "is on logical router %s, which has
%"
> - PRIuSIZE" distributed gateway
ports. This"
> - "option can only be used when there
is "
> - "a single distributed gateway
port.",
> - op->peer->key,
op->peer->od->nbr->name,
> - op->peer->od->n_l3dgw_ports);
> - }
> - }
> - } else if (chassis && op->od->n_localnet_ports) {
> - add_router_port_garp = true;
> - }
> -
> - if (add_router_port_garp) {
> - struct ds garp_info = DS_EMPTY_INITIALIZER;
> - ds_put_format(&garp_info, "%s",
op->peer->lrp_networks.ea_s);
> -
> - for (size_t i = 0; i <
op->peer->lrp_networks.n_ipv4_addrs;
> - i++) {
> - ds_put_format(&garp_info, " %s",
> -
op->peer->lrp_networks.ipv4_addrs[i].addr_s);
> - }
> -
> - if (op->peer->od->n_l3dgw_ports) {
> - const struct ovn_port *l3dgw_port = (
> - is_l3dgw_port(op->peer)
> - ? op->peer
> - : op->peer->od->l3dgw_ports[0]);
> - ds_put_format(&garp_info, " is_chassis_resident(%s)",
> - l3dgw_port->cr_port->json_key);
> - }
> -
> - n_nats++;
> - nats = xrealloc(nats, (n_nats * sizeof *nats));
> - nats[n_nats - 1] = ds_steal_cstr(&garp_info);
> - ds_destroy(&garp_info);
> - }
> - sbrec_port_binding_set_nat_addresses(op->sb,
> - (const char **) nats,
n_nats);
> - for (size_t i = 0; i < n_nats; i++) {
> - free(nats[i]);
> - }
> - free(nats);
> }
>
> sbrec_port_binding_set_parent_port(op->sb,
op->nbsp->parent_name);
> @@ -4695,6 +4583,137 @@ sync_lbs(struct ovsdb_idl_txn *ovnsb_txn,
> }
> }
>
> +/* Sync the SB Port bindings which needs to be updated.
> + * Presently it syncs the nat column of port bindings corresponding to
> + * the logical switch ports. */
> +void sync_pbs(struct ovsdb_idl_txn *ovnsb_idl_txn, struct hmap *ls_ports)
> +{
> + ovs_assert(ovnsb_idl_txn);
> +
> + struct ovn_port *op;
> + HMAP_FOR_EACH (op, key_node, ls_ports) {
> + if (lsp_is_router(op->nbsp)) {
> + const char *chassis = NULL;
> + if (op->peer && op->peer->od && op->peer->od->nbr) {
> + chassis = smap_get(&op->peer->od->nbr->options,
"chassis");
> + }
> +
> + const char *nat_addresses = smap_get(&op->nbsp->options,
> + "nat-addresses");
> + size_t n_nats = 0;
> + char **nats = NULL;
> + bool l3dgw_ports = op->peer && op->peer->od &&
> + op->peer->od->n_l3dgw_ports;
> + if (nat_addresses && !strcmp(nat_addresses, "router")) {
> + if (op->peer && op->peer->od
> + && (chassis || op->peer->od->n_l3dgw_ports)) {
> + bool exclude_lb_vips =
smap_get_bool(&op->nbsp->options,
> + "exclude-lb-vips-from-garp", false);
> + nats = get_nat_addresses(op->peer, &n_nats, false,
> + !exclude_lb_vips);
> + }
> + } else if (nat_addresses && (chassis || l3dgw_ports)) {
> + struct lport_addresses laddrs;
> + if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
> + static struct vlog_rate_limit rl =
> + VLOG_RATE_LIMIT_INIT(1, 1);
> + VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
> + } else {
> + destroy_lport_addresses(&laddrs);
> + n_nats = 1;
> + nats = xcalloc(1, sizeof *nats);
> + struct ds nat_addr = DS_EMPTY_INITIALIZER;
> + ds_put_format(&nat_addr, "%s", nat_addresses);
> + if (l3dgw_ports) {
> + const struct ovn_port *l3dgw_port = (
> + is_l3dgw_port(op->peer)
> + ? op->peer
> + : op->peer->od->l3dgw_ports[0]);
> + ds_put_format(&nat_addr, "
is_chassis_resident(%s)",
> + l3dgw_port->cr_port->json_key);
> + }
> + nats[0] = xstrdup(ds_cstr(&nat_addr));
> + ds_destroy(&nat_addr);
> + }
> + }
> +
> + /* Add the router mac and IPv4 addresses to
> + * Port_Binding.nat_addresses so that GARP is sent for these
> + * IPs by the ovn-controller on which the distributed gateway
> + * router port resides if:
> + *
> + * - op->peer has 'reside-on-redirect-chassis' set and the
> + * the logical router datapath has distributed router
port.
> + *
> + * - op->peer is distributed gateway router port.
> + *
> + * - op->peer's router is a gateway router and op has a
localnet
> + * port.
> + *
> + * Note: Port_Binding.nat_addresses column is also used for
> + * sending the GARPs for the router port IPs.
> + * */
> + bool add_router_port_garp = false;
> + if (op->peer && op->peer->nbrp &&
op->peer->od->n_l3dgw_ports) {
> + if (is_l3dgw_port(op->peer)) {
> + add_router_port_garp = true;
> + } else if (smap_get_bool(&op->peer->nbrp->options,
> + "reside-on-redirect-chassis", false)) {
> + if (op->peer->od->n_l3dgw_ports == 1) {
> + add_router_port_garp = true;
> + } else {
> + static struct vlog_rate_limit rl =
> + VLOG_RATE_LIMIT_INIT(1, 1);
> + VLOG_WARN_RL(&rl,
"\"reside-on-redirect-chassis\" is "
> + "set on logical router port %s,
which "
> + "is on logical router %s, which has
%"
> + PRIuSIZE" distributed gateway
ports. This"
> + "option can only be used when there
is "
> + "a single distributed gateway
port.",
> + op->peer->key,
op->peer->od->nbr->name,
> + op->peer->od->n_l3dgw_ports);
> + }
> + }
> + } else if (chassis && op->od->n_localnet_ports) {
> + add_router_port_garp = true;
> + }
> +
> + if (add_router_port_garp) {
> + struct ds garp_info = DS_EMPTY_INITIALIZER;
> + ds_put_format(&garp_info, "%s",
op->peer->lrp_networks.ea_s);
> +
> + for (size_t i = 0; i <
op->peer->lrp_networks.n_ipv4_addrs;
> + i++) {
> + ds_put_format(&garp_info, " %s",
> +
op->peer->lrp_networks.ipv4_addrs[i].addr_s);
> + }
> +
> + if (op->peer->od->n_l3dgw_ports) {
> + const struct ovn_port *l3dgw_port = (
> + is_l3dgw_port(op->peer)
> + ? op->peer
> + : op->peer->od->l3dgw_ports[0]);
> + ds_put_format(&garp_info, " is_chassis_resident(%s)",
> + l3dgw_port->cr_port->json_key);
> + }
> +
> + n_nats++;
> + nats = xrealloc(nats, (n_nats * sizeof *nats));
> + nats[n_nats - 1] = ds_steal_cstr(&garp_info);
> + ds_destroy(&garp_info);
> + }
> + sbrec_port_binding_set_nat_addresses(op->sb,
> + (const char **) nats,
n_nats);
> + for (size_t i = 0; i < n_nats; i++) {
> + free(nats[i]);
> + }
> + free(nats);
> + } else {
> + sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
> + }
> + }
> +}
> +
> static bool
> ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
> {
> diff --git a/northd/northd.h b/northd/northd.h
> index 044d4ee0c0..cd2e5394c2 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -374,4 +374,6 @@ const char *northd_get_svc_monitor_mac(void);
> void sync_lbs(struct ovsdb_idl_txn *, const struct
sbrec_load_balancer_table *,
> struct ovn_datapaths *ls_datapaths, struct hmap *lbs);
>
> +void sync_pbs(struct ovsdb_idl_txn *, struct hmap *ls_ports);
> +
> #endif /* NORTHD_H */
> --
> 2.40.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev