On Fri, Sep 1, 2023 at 2:41 AM Han Zhou <[email protected]> wrote:
>
> On Fri, Aug 18, 2023 at 1:58 AM <[email protected]> wrote:
> >
> > From: Numan Siddique <[email protected]>
> >
> > A new engine node 'sync_to_sb_pb' is added within 'sync_to_sb'
> > node to sync NAT column of Port bindings table. This separation
> > is required in order to add load balancer group I-P handling
> > in 'northd' engine node (which is handled in the next commit).
> >
> > 'sync_to_sb_pb' engine node can be later expanded to sync other
> > Port binding columns if required.
> >
> > Signed-off-by: Numan Siddique <[email protected]>
> > ---
> > northd/en-sync-sb.c | 31 +++++
> > northd/en-sync-sb.h | 4 +
> > northd/inc-proc-northd.c | 8 +-
> > northd/northd.c | 243 +++++++++++++++++++++------------------
> > northd/northd.h | 2 +
> > 5 files changed, 174 insertions(+), 114 deletions(-)
> >
> > diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c
> > index 9832fce30a..552ed56452 100644
> > --- a/northd/en-sync-sb.c
> > +++ b/northd/en-sync-sb.c
> > @@ -254,6 +254,37 @@ sync_to_sb_lb_northd_handler(struct engine_node
> *node, void *data OVS_UNUSED)
> > return false;
> > }
> >
> > +/* sync_to_sb_pb engine node functions.
> > + * This engine node syncs the SB Port Bindings (partly).
> > + * en_northd engine create the SB Port binding rows and
> > + * updates most of the columns.
> > + * This engine node updates the port binding columns which
> > + * needs to be updated after northd engine is run.
> > + */
> > +
> > +void *
> > +en_sync_to_sb_pb_init(struct engine_node *node OVS_UNUSED,
> > + struct engine_arg *arg OVS_UNUSED)
> > +{
> > + return NULL;
> > +}
> > +
> > +void
> > +en_sync_to_sb_pb_run(struct engine_node *node, void *data OVS_UNUSED)
> > +{
> > + const struct engine_context *eng_ctx = engine_get_context();
> > + struct northd_data *northd_data = engine_get_input_data("northd",
> node);
> > +
> > + sync_pbs(eng_ctx->ovnsb_idl_txn, &northd_data->ls_ports);
> > + engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> > +void
> > +en_sync_to_sb_pb_cleanup(void *data OVS_UNUSED)
> > +{
> > +
> > +}
> > +
> > /* static functions. */
> > static void
> > sync_addr_set(struct ovsdb_idl_txn *ovnsb_txn, const char *name,
> > diff --git a/northd/en-sync-sb.h b/northd/en-sync-sb.h
> > index 06d2a57710..700d3340e4 100644
> > --- a/northd/en-sync-sb.h
> > +++ b/northd/en-sync-sb.h
> > @@ -22,4 +22,8 @@ void en_sync_to_sb_lb_run(struct engine_node *, void
> *data);
> > void en_sync_to_sb_lb_cleanup(void *data);
> > bool sync_to_sb_lb_northd_handler(struct engine_node *, void *data
> OVS_UNUSED);
> >
> > +void *en_sync_to_sb_pb_init(struct engine_node *, struct engine_arg *);
> > +void en_sync_to_sb_pb_run(struct engine_node *, void *data);
> > +void en_sync_to_sb_pb_cleanup(void *data);
> > +
> > #endif /* end of EN_SYNC_SB_H */
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index 402c94e88c..dc8b880fd8 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -141,6 +141,7 @@ static ENGINE_NODE(sync_to_sb_addr_set,
> "sync_to_sb_addr_set");
> > static ENGINE_NODE(fdb_aging, "fdb_aging");
> > static ENGINE_NODE(fdb_aging_waker, "fdb_aging_waker");
> > static ENGINE_NODE(sync_to_sb_lb, "sync_to_sb_lb");
> > +static ENGINE_NODE(sync_to_sb_pb, "sync_to_sb_pb");
> > static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data");
> >
> > void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> > @@ -215,13 +216,16 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> > sync_to_sb_lb_northd_handler);
> > engine_add_input(&en_sync_to_sb_lb, &en_sb_load_balancer, NULL);
> >
> > + engine_add_input(&en_sync_to_sb_pb, &en_northd, NULL);
>
> NULL handler here always triggers recompute for any en_northd change, which
> causes a performance regression for VIF I-P. Before this change, only
> tracked LSP changes will have related SB port-binding synced to SB, but now
> it iterates through all the ls_ports and resync each of them even if there
> is only one LSP in tracked_changes.
>
> I ran the scale test of the simulated ovn-k8s topology of 500 chassis x 50
> lsp, for each single VIF creation/deletion the "ovn-northd completion" time
> increased from 25ms to 40ms - nearly doubled.
>
> I think a simple handler can be implemented so that only sync tracked LSPs,
> and fallback to recompute only if changes are not tracked.
Thanks for the reviews. I've added a handler in v7. Please take a look.
Thanks
Numan
>
> Thanks,
> Han
>
> > +
> > /* en_sync_to_sb engine node syncs the SB database tables from
> > * the NB database tables.
> > - * Right now this engine syncs the SB Address_Set table and
> > - * SB Load_Balancer table.
> > + * Right now this engine syncs the SB Address_Set table,
> > + * SB Load_Balancer table and (partly) SB Port_Binding table.
> > */
> > engine_add_input(&en_sync_to_sb, &en_sync_to_sb_addr_set, NULL);
> > engine_add_input(&en_sync_to_sb, &en_sync_to_sb_lb, NULL);
> > + engine_add_input(&en_sync_to_sb, &en_sync_to_sb_pb, NULL);
> >
> > engine_add_input(&en_sync_from_sb, &en_northd,
> > sync_from_sb_northd_handler);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 1477b79331..a3bd21e0b4 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -3514,8 +3514,6 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn
> *ovnsb_txn,
> > ds_destroy(&s);
> >
> > sbrec_port_binding_set_external_ids(op->sb,
> &op->nbrp->external_ids);
> > -
> > - sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
> > } else {
> > if (!lsp_is_router(op->nbsp)) {
> > uint32_t queue_id = smap_get_int(
> > @@ -3631,116 +3629,6 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn
> *ovnsb_txn,
> > } else {
> > sbrec_port_binding_set_options(op->sb, NULL);
> > }
> > - const char *nat_addresses = smap_get(&op->nbsp->options,
> > - "nat-addresses");
> > - size_t n_nats = 0;
> > - char **nats = NULL;
> > - bool l3dgw_ports = op->peer && op->peer->od &&
> > - op->peer->od->n_l3dgw_ports;
> > - if (nat_addresses && !strcmp(nat_addresses, "router")) {
> > - if (op->peer && op->peer->od
> > - && (chassis || op->peer->od->n_l3dgw_ports)) {
> > - bool exclude_lb_vips =
> smap_get_bool(&op->nbsp->options,
> > - "exclude-lb-vips-from-garp", false);
> > - nats = get_nat_addresses(op->peer, &n_nats, false,
> > - !exclude_lb_vips);
> > - }
> > - } else if (nat_addresses && (chassis || l3dgw_ports)) {
> > - struct lport_addresses laddrs;
> > - if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
> > - static struct vlog_rate_limit rl =
> > - VLOG_RATE_LIMIT_INIT(1, 1);
> > - VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
> > - } else {
> > - destroy_lport_addresses(&laddrs);
> > - n_nats = 1;
> > - nats = xcalloc(1, sizeof *nats);
> > - struct ds nat_addr = DS_EMPTY_INITIALIZER;
> > - ds_put_format(&nat_addr, "%s", nat_addresses);
> > - if (l3dgw_ports) {
> > - const struct ovn_port *l3dgw_port = (
> > - is_l3dgw_port(op->peer)
> > - ? op->peer
> > - : op->peer->od->l3dgw_ports[0]);
> > - ds_put_format(&nat_addr, "
> is_chassis_resident(%s)",
> > - l3dgw_port->cr_port->json_key);
> > - }
> > - nats[0] = xstrdup(ds_cstr(&nat_addr));
> > - ds_destroy(&nat_addr);
> > - }
> > - }
> > -
> > - /* Add the router mac and IPv4 addresses to
> > - * Port_Binding.nat_addresses so that GARP is sent for these
> > - * IPs by the ovn-controller on which the distributed gateway
> > - * router port resides if:
> > - *
> > - * - op->peer has 'reside-on-redirect-chassis' set and the
> > - * the logical router datapath has distributed router
> port.
> > - *
> > - * - op->peer is distributed gateway router port.
> > - *
> > - * - op->peer's router is a gateway router and op has a
> localnet
> > - * port.
> > - *
> > - * Note: Port_Binding.nat_addresses column is also used for
> > - * sending the GARPs for the router port IPs.
> > - * */
> > - bool add_router_port_garp = false;
> > - if (op->peer && op->peer->nbrp &&
> op->peer->od->n_l3dgw_ports) {
> > - if (is_l3dgw_port(op->peer)) {
> > - add_router_port_garp = true;
> > - } else if (smap_get_bool(&op->peer->nbrp->options,
> > - "reside-on-redirect-chassis", false)) {
> > - if (op->peer->od->n_l3dgw_ports == 1) {
> > - add_router_port_garp = true;
> > - } else {
> > - static struct vlog_rate_limit rl =
> > - VLOG_RATE_LIMIT_INIT(1, 1);
> > - VLOG_WARN_RL(&rl,
> "\"reside-on-redirect-chassis\" is "
> > - "set on logical router port %s,
> which "
> > - "is on logical router %s, which has
> %"
> > - PRIuSIZE" distributed gateway
> ports. This"
> > - "option can only be used when there
> is "
> > - "a single distributed gateway
> port.",
> > - op->peer->key,
> op->peer->od->nbr->name,
> > - op->peer->od->n_l3dgw_ports);
> > - }
> > - }
> > - } else if (chassis && op->od->n_localnet_ports) {
> > - add_router_port_garp = true;
> > - }
> > -
> > - if (add_router_port_garp) {
> > - struct ds garp_info = DS_EMPTY_INITIALIZER;
> > - ds_put_format(&garp_info, "%s",
> op->peer->lrp_networks.ea_s);
> > -
> > - for (size_t i = 0; i <
> op->peer->lrp_networks.n_ipv4_addrs;
> > - i++) {
> > - ds_put_format(&garp_info, " %s",
> > -
> op->peer->lrp_networks.ipv4_addrs[i].addr_s);
> > - }
> > -
> > - if (op->peer->od->n_l3dgw_ports) {
> > - const struct ovn_port *l3dgw_port = (
> > - is_l3dgw_port(op->peer)
> > - ? op->peer
> > - : op->peer->od->l3dgw_ports[0]);
> > - ds_put_format(&garp_info, " is_chassis_resident(%s)",
> > - l3dgw_port->cr_port->json_key);
> > - }
> > -
> > - n_nats++;
> > - nats = xrealloc(nats, (n_nats * sizeof *nats));
> > - nats[n_nats - 1] = ds_steal_cstr(&garp_info);
> > - ds_destroy(&garp_info);
> > - }
> > - sbrec_port_binding_set_nat_addresses(op->sb,
> > - (const char **) nats,
> n_nats);
> > - for (size_t i = 0; i < n_nats; i++) {
> > - free(nats[i]);
> > - }
> > - free(nats);
> > }
> >
> > sbrec_port_binding_set_parent_port(op->sb,
> op->nbsp->parent_name);
> > @@ -4695,6 +4583,137 @@ sync_lbs(struct ovsdb_idl_txn *ovnsb_txn,
> > }
> > }
> >
> > +/* Sync the SB Port bindings which needs to be updated.
> > + * Presently it syncs the nat column of port bindings corresponding to
> > + * the logical switch ports. */
> > +void sync_pbs(struct ovsdb_idl_txn *ovnsb_idl_txn, struct hmap *ls_ports)
> > +{
> > + ovs_assert(ovnsb_idl_txn);
> > +
> > + struct ovn_port *op;
> > + HMAP_FOR_EACH (op, key_node, ls_ports) {
> > + if (lsp_is_router(op->nbsp)) {
> > + const char *chassis = NULL;
> > + if (op->peer && op->peer->od && op->peer->od->nbr) {
> > + chassis = smap_get(&op->peer->od->nbr->options,
> "chassis");
> > + }
> > +
> > + const char *nat_addresses = smap_get(&op->nbsp->options,
> > + "nat-addresses");
> > + size_t n_nats = 0;
> > + char **nats = NULL;
> > + bool l3dgw_ports = op->peer && op->peer->od &&
> > + op->peer->od->n_l3dgw_ports;
> > + if (nat_addresses && !strcmp(nat_addresses, "router")) {
> > + if (op->peer && op->peer->od
> > + && (chassis || op->peer->od->n_l3dgw_ports)) {
> > + bool exclude_lb_vips =
> smap_get_bool(&op->nbsp->options,
> > + "exclude-lb-vips-from-garp", false);
> > + nats = get_nat_addresses(op->peer, &n_nats, false,
> > + !exclude_lb_vips);
> > + }
> > + } else if (nat_addresses && (chassis || l3dgw_ports)) {
> > + struct lport_addresses laddrs;
> > + if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
> > + static struct vlog_rate_limit rl =
> > + VLOG_RATE_LIMIT_INIT(1, 1);
> > + VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
> > + } else {
> > + destroy_lport_addresses(&laddrs);
> > + n_nats = 1;
> > + nats = xcalloc(1, sizeof *nats);
> > + struct ds nat_addr = DS_EMPTY_INITIALIZER;
> > + ds_put_format(&nat_addr, "%s", nat_addresses);
> > + if (l3dgw_ports) {
> > + const struct ovn_port *l3dgw_port = (
> > + is_l3dgw_port(op->peer)
> > + ? op->peer
> > + : op->peer->od->l3dgw_ports[0]);
> > + ds_put_format(&nat_addr, "
> is_chassis_resident(%s)",
> > + l3dgw_port->cr_port->json_key);
> > + }
> > + nats[0] = xstrdup(ds_cstr(&nat_addr));
> > + ds_destroy(&nat_addr);
> > + }
> > + }
> > +
> > + /* Add the router mac and IPv4 addresses to
> > + * Port_Binding.nat_addresses so that GARP is sent for these
> > + * IPs by the ovn-controller on which the distributed gateway
> > + * router port resides if:
> > + *
> > + * - op->peer has 'reside-on-redirect-chassis' set and the
> > + * the logical router datapath has distributed router
> port.
> > + *
> > + * - op->peer is distributed gateway router port.
> > + *
> > + * - op->peer's router is a gateway router and op has a
> localnet
> > + * port.
> > + *
> > + * Note: Port_Binding.nat_addresses column is also used for
> > + * sending the GARPs for the router port IPs.
> > + * */
> > + bool add_router_port_garp = false;
> > + if (op->peer && op->peer->nbrp &&
> op->peer->od->n_l3dgw_ports) {
> > + if (is_l3dgw_port(op->peer)) {
> > + add_router_port_garp = true;
> > + } else if (smap_get_bool(&op->peer->nbrp->options,
> > + "reside-on-redirect-chassis", false)) {
> > + if (op->peer->od->n_l3dgw_ports == 1) {
> > + add_router_port_garp = true;
> > + } else {
> > + static struct vlog_rate_limit rl =
> > + VLOG_RATE_LIMIT_INIT(1, 1);
> > + VLOG_WARN_RL(&rl,
> "\"reside-on-redirect-chassis\" is "
> > + "set on logical router port %s,
> which "
> > + "is on logical router %s, which has
> %"
> > + PRIuSIZE" distributed gateway
> ports. This"
> > + "option can only be used when there
> is "
> > + "a single distributed gateway
> port.",
> > + op->peer->key,
> op->peer->od->nbr->name,
> > + op->peer->od->n_l3dgw_ports);
> > + }
> > + }
> > + } else if (chassis && op->od->n_localnet_ports) {
> > + add_router_port_garp = true;
> > + }
> > +
> > + if (add_router_port_garp) {
> > + struct ds garp_info = DS_EMPTY_INITIALIZER;
> > + ds_put_format(&garp_info, "%s",
> op->peer->lrp_networks.ea_s);
> > +
> > + for (size_t i = 0; i <
> op->peer->lrp_networks.n_ipv4_addrs;
> > + i++) {
> > + ds_put_format(&garp_info, " %s",
> > +
> op->peer->lrp_networks.ipv4_addrs[i].addr_s);
> > + }
> > +
> > + if (op->peer->od->n_l3dgw_ports) {
> > + const struct ovn_port *l3dgw_port = (
> > + is_l3dgw_port(op->peer)
> > + ? op->peer
> > + : op->peer->od->l3dgw_ports[0]);
> > + ds_put_format(&garp_info, " is_chassis_resident(%s)",
> > + l3dgw_port->cr_port->json_key);
> > + }
> > +
> > + n_nats++;
> > + nats = xrealloc(nats, (n_nats * sizeof *nats));
> > + nats[n_nats - 1] = ds_steal_cstr(&garp_info);
> > + ds_destroy(&garp_info);
> > + }
> > + sbrec_port_binding_set_nat_addresses(op->sb,
> > + (const char **) nats,
> n_nats);
> > + for (size_t i = 0; i < n_nats; i++) {
> > + free(nats[i]);
> > + }
> > + free(nats);
> > + } else {
> > + sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
> > + }
> > + }
> > +}
> > +
> > static bool
> > ovn_port_add_tnlid(struct ovn_port *op, uint32_t tunnel_key)
> > {
> > diff --git a/northd/northd.h b/northd/northd.h
> > index 044d4ee0c0..cd2e5394c2 100644
> > --- a/northd/northd.h
> > +++ b/northd/northd.h
> > @@ -374,4 +374,6 @@ const char *northd_get_svc_monitor_mac(void);
> > void sync_lbs(struct ovsdb_idl_txn *, const struct
> sbrec_load_balancer_table *,
> > struct ovn_datapaths *ls_datapaths, struct hmap *lbs);
> >
> > +void sync_pbs(struct ovsdb_idl_txn *, struct hmap *ls_ports);
> > +
> > #endif /* NORTHD_H */
> > --
> > 2.40.1
> >
> > _______________________________________________
> > dev mailing list
> > [email protected]
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev