> Hi Lorenzo,
> 
> On 9/19/24 19:08, Lorenzo Bianconi wrote:
> > Introduce ECMP_Nexthop table in the SB db in order to track active
> > ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop
> > table contains ip and mac address for each active nexthop.
> > 
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> >  northd/en-northd.c       | 27 ++++++++++++++++++++
> >  northd/en-northd.h       |  4 +++
> >  northd/inc-proc-northd.c |  8 +++++-
> >  northd/northd.c          | 53 +++++++++++++++++++++++++++++++++++++++-
> >  northd/northd.h          |  4 +++
> >  ovn-sb.ovsschema         | 17 +++++++++++--
> >  ovn-sb.xml               | 34 ++++++++++++++++++++++++++
> >  tests/ovn-northd.at      | 18 ++++++++------
> >  8 files changed, 154 insertions(+), 11 deletions(-)
> > 
> > diff --git a/northd/en-northd.c b/northd/en-northd.c
> > index 24ed31517..b8227617b 100644
> > --- a/northd/en-northd.c
> > +++ b/northd/en-northd.c
> > @@ -404,6 +404,21 @@ en_bfd_sync_run(struct engine_node *node, void *data)
> >      engine_set_node_state(node, EN_UPDATED);
> >  }
> >  
> > +void
> > +en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED)
> > +{
> > +    const struct engine_context *eng_ctx = engine_get_context();
> > +    struct static_routes_data *static_routes_data =
> > +        engine_get_input_data("static_routes", node);
> > +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> > +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> > +
> > +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> > +                             &static_routes_data->parsed_routes,
> > +                             sbrec_ecmp_nexthop_table);
> > +    engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> >  void
> >  *en_northd_init(struct engine_node *node OVS_UNUSED,
> >                  struct engine_arg *arg OVS_UNUSED)
> > @@ -454,6 +469,13 @@ void
> >      return data;
> >  }
> >  
> > +void
> > +*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> 
> Nit 'void *' on the same line.

ack, I will fix it.

> 
> > +                      struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    return NULL;
> > +}
> > +
> >  void
> >  en_northd_cleanup(void *data)
> >  {
> > @@ -526,3 +548,8 @@ en_bfd_sync_cleanup(void *data)
> >  {
> >      bfd_sync_destroy(data);
> >  }
> > +
> > +void
> > +en_ecmp_nexthop_cleanup(void *data OVS_UNUSED)
> > +{
> > +}
> > diff --git a/northd/en-northd.h b/northd/en-northd.h
> > index 631a7c17a..2666cc67e 100644
> > --- a/northd/en-northd.h
> > +++ b/northd/en-northd.h
> > @@ -42,5 +42,9 @@ bool bfd_sync_northd_change_handler(struct engine_node 
> > *node,
> >                                      void *data OVS_UNUSED);
> >  void en_bfd_sync_run(struct engine_node *node, void *data);
> >  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> > +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> > +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > +                           struct engine_arg *arg OVS_UNUSED);
> > +void en_ecmp_nexthop_cleanup(void *data);
> >  
> >  #endif /* EN_NORTHD_H */
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index 1f79916a5..cdc080ef0 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list;
> >      SB_NODE(fdb, "fdb") \
> >      SB_NODE(static_mac_binding, "static_mac_binding") \
> >      SB_NODE(chassis_template_var, "chassis_template_var") \
> > -    SB_NODE(logical_dp_group, "logical_dp_group")
> > +    SB_NODE(logical_dp_group, "logical_dp_group") \
> > +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
> >  
> >  enum sb_engine_node {
> >  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> > @@ -162,6 +163,7 @@ static ENGINE_NODE(route_policies, "route_policies");
> >  static ENGINE_NODE(static_routes, "static_routes");
> >  static ENGINE_NODE(bfd, "bfd");
> >  static ENGINE_NODE(bfd_sync, "bfd_sync");
> > +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
> >  
> >  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >                            struct ovsdb_idl_loop *sb)
> > @@ -264,6 +266,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
> >      engine_add_input(&en_bfd_sync, &en_northd, 
> > bfd_sync_northd_change_handler);
> >  
> > +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> > +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> > +
> >      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
> >      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
> >      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> > @@ -334,6 +339,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >      engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL);
> >      engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL);
> >  
> > +    engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL);
> >      engine_add_input(&en_northd_output, &en_sync_from_sb, NULL);
> >      engine_add_input(&en_northd_output, &en_sync_to_sb,
> >                       northd_output_sync_to_sb_handler);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index a267cd5f8..3e3bb84ef 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -10665,6 +10665,55 @@ build_bfd_map(const struct nbrec_bfd_table 
> > *nbrec_bfd_table,
> >      }
> >  }
> >  
> > +void
> > +build_ecmp_nexthop_table(
> > +        struct ovsdb_idl_txn *ovnsb_txn,
> > +        struct hmap *routes,
> > +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> > +{
> > +    if (!ovnsb_txn) {
> > +        return;
> > +    }
> > +
> > +    struct sset nb_nexthops_sset = SSET_INITIALIZER(&nb_nexthops_sset);
> > +    struct sset sb_nexthops_sset = SSET_INITIALIZER(&sb_nexthops_sset);
> > +
> > +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> > +                                       sbrec_ecmp_nexthop_table) {
> > +        sset_add(&sb_nexthops_sset, sb_ecmp_nexthop->nexthop);
> > +    }
> 
> This doesn't take into account datapath in any way.  See my comment
> about adding "Datapath_Binding/Port_Binding" references to the
> SB.ECMP_Next_Hop records.

ack, I will fix it.
> 
> > +
> > +    struct parsed_route *pr;
> > +    HMAP_FOR_EACH (pr, key_node, routes) {
> > +        if (!pr->ecmp_symmetric_reply) {
> > +            continue;
> > +        }
> > +
> > +        if (!pr->out_port) {
> > +            continue;
> > +        }
> > +
> > +        const struct nbrec_logical_router_static_route *r = pr->route;
> > +        if (!sset_contains(&sb_nexthops_sset, r->nexthop)) {
> > +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> > +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> > +            sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, 
> > pr->out_port->key);
> > +        }
> > +        sset_add(&nb_nexthops_sset, r->nexthop);
> > +    }
> > +
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
> > +                                            sbrec_ecmp_nexthop_table) {
> > +        if (!sset_contains(&nb_nexthops_sset, sb_ecmp_nexthop->nexthop)) {
> > +            sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
> > +        }
> > +    }
> > +
> > +    sset_destroy(&nb_nexthops_sset);
> > +    sset_destroy(&sb_nexthops_sset);
> > +}
> > +
> >  /* Returns a string of the IP address of the router port 'op' that
> >   * overlaps with 'ip_s".  If one is not found, returns NULL.
> >   *
> > @@ -11105,10 +11154,11 @@ parsed_routes_add(struct ovn_datapath *od, const 
> > struct hmap *lr_ports,
> >      }
> >  
> >      /* Verify that ip_prefix and nexthop are on the same network. */
> > +    struct ovn_port *out_port = NULL;
> >      if (!is_discard_route &&
> >          !find_static_route_outport(od, lr_ports, route,
> >                                     IN6_IS_ADDR_V4MAPPED(&prefix),
> > -                                   NULL, NULL)) {
> > +                                   NULL, &out_port)) {
> 
> We can now also remove the other invocation of
> find_static_route_outport(), in build_ecmp_route_flow():
> 
> https://github.com/ovn-org/ovn/blob/d276728a8eaddbdc2944225457d22734837b3089/northd/northd.c#L11664-L11668
> 
> I think it's fine to just use route_->out_port there.  It will save us a
> lookup.

I guess we need to store even lrp_addr_s in this case, right?

> 
> >          return;
> >      }
> >  
> > @@ -11151,6 +11201,7 @@ parsed_routes_add(struct ovn_datapath *od, const 
> > struct hmap *lr_ports,
> >      new_pr->hash = route_hash(new_pr);
> >      new_pr->route = route;
> >      new_pr->nbr = od->nbr;
> > +    new_pr->out_port = out_port;
> >      new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options,
> >                                                   "ecmp_symmetric_reply",
> >                                                   false);
> > diff --git a/northd/northd.h b/northd/northd.h
> > index 8f76d642d..c4c2a5d7e 100644
> > --- a/northd/northd.h
> > +++ b/northd/northd.h
> > @@ -703,6 +703,7 @@ struct parsed_route {
> >      uint32_t route_table_id;
> >      uint32_t hash;
> >      const struct nbrec_logical_router_static_route *route;
> > +    struct ovn_port *out_port;
> >      bool ecmp_symmetric_reply;
> >      bool is_discard_route;
> >      const struct nbrec_logical_router *nbr;
> > @@ -745,6 +746,9 @@ void bfd_destroy(struct bfd_data *);
> >  void bfd_sync_init(struct bfd_sync_data *);
> >  void bfd_sync_destroy(struct bfd_sync_data *);
> >  
> > +void build_ecmp_nexthop_table(struct ovsdb_idl_txn *, struct hmap *,
> 
> IMO, raw hmap function arguments should have a name - it's not clear
> what that map actually holds until we look at the implementation of the
> function.

ack, I will fix it

> 
> > +                              const struct sbrec_ecmp_nexthop_table *);
> > +
> >  struct lflow_table;
> >  struct lr_stateful_tracked_data;
> >  struct ls_stateful_tracked_data;
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index 73abf2c8d..870ee256b 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Southbound",
> > -    "version": "20.37.0",
> > -    "cksum": "1950136776 31493",
> > +    "version": "20.38.0",
> > +    "cksum": "3311058931 32061",
> >      "tables": {
> >          "SB_Global": {
> >              "columns": {
> > @@ -610,6 +610,19 @@
> >                                            "refTable": 
> > "Datapath_Binding"}}}},
> >              "indexes": [["logical_port", "ip"]],
> >              "isRoot": true},
> > +        "ECMP_Nexthop": {
> > +            "columns": {
> > +                "nexthop": {"type": "string"},
> > +                "port": {"type": "string"},
> 
> Is there a reason why this is not a reference to "Port_Binding" instead
> of a plain string?
> 
> Also, we're missing "Datapath_Binding" as a column (what if two
> independent routers have the same next-hop IP?).  However, if we add
> Port_Binding as an explicit reference we can get the datapath
> information from there.

ack, I will fix it

> 
> > +                "mac": {"type": "string"},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["nexthop"]],
> 
> Ditto: this is not enough, multiple routers might use the same next hop.

ack, I will fix it

> 
> > +            "isRoot": true},
> >          "Chassis_Template_Var": {
> >              "columns": {
> >                  "chassis": {"type": "string"},
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index 746cc6308..b5dc34012 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -5178,4 +5178,38 @@ tcp.flags = RST;
> >        The set of variable values for a given chassis.
> >      </column>
> >    </table>
> > +
> > +  <table name="ECMP_Nexthop">
> > +    <p>
> > +      Each record in this table represents an active ECMP route committed 
> > by
> > +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking 
> > table.
> > +      <code>ECMP_Nexthop</code> table is used by 
> > <code>ovn-controller</code>
> > +      to track active ct entries and to flush stale ones.
> > +    </p>
> > +    <column name="nexthop">
> > +      <p>
> > +        Nexthop IP address for this ECMP route. Nexthop IP address should
> > +        be the IP address of a connected router port or the IP address of
> > +        an external device used as nexthop for the given destination.
> > +      </p>
> > +    </column>
> > +    <column name="port">
> > +      <p>
> > +        Logical port used to connect to the configured next-hop.
> > +      </p>
> > +    </column>
> > +    <column name="mac">
> > +      <p>
> > +        Nexthop mac address.
> > +      </p>
> > +    </column>
> > +
> > +    <column name="options">
> > +      Reserved for future use.
> > +    </column>
> > +
> 
> Let's add this table only if needed?  It's kind of confusing to have it
> here.  DB schema column additions are backwards compatible so we don't
> need to already reserve columns for the future.

ack, I will fix it

> 
> > +    <column name="external_ids">
> > +      See <em>External IDs</em> at the beginning of this document.
> > +    </column>
> > +  </table>
> >  </database>
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index dcc3dbbc3..9923243fa 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -3878,7 +3878,7 @@ wait_row_count bfd 1 logical_port=r0-sw3 
> > detect_mult=5 dst_ip=192.168.3.2 \
> >  check_engine_stats northd norecompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +#check_engine_stats northd_output norecompute compute
> 
> I guess this is by accident - it should now be changed to "recompute
> compute" I guess.

ack, I will fix it

> 
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3893,7 +3893,7 @@ wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 
> > min_tx=1000 detect_mult=100
> >  check_engine_stats northd norecompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3905,7 +3905,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats static_routes recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3921,7 +3921,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats static_routes recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3933,7 +3933,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats route_policies recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3968,7 +3968,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats static_routes recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -3983,7 +3983,7 @@ check_engine_stats northd recompute nocompute
> >  check_engine_stats bfd recompute nocompute
> >  check_engine_stats route_policies recompute nocompute
> >  check_engine_stats lflow recompute nocompute
> > -check_engine_stats northd_output norecompute compute
> > +check_engine_stats northd_output recompute nocompute
> >  CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> >  
> > @@ -6800,6 +6800,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
> >  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
> >  
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> > 192.168.0.10
> > +check_row_count ECMP_Nexthop 1
> 
> I'd make this check more explicit and ensure that SB.ECMP_Nexthop fields
> are also set correctly.

ack, I will fix it

Regards,
Lorenzo

> 
> >  
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  
> > @@ -6817,6 +6818,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | 
> > ovn_strip_lflows], [0], [dn
> >  ])
> >  
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> > 192.168.0.20
> > +check_row_count ECMP_Nexthop 2
> >  
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], 
> > [dnl
> > @@ -6846,6 +6848,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows 
> > | ovn_strip_lflows], [0], [
> >  
> >  # add ecmp route with wrong nexthop
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> > 192.168.1.20
> > +check_row_count ECMP_Nexthop 2
> >  
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], 
> > [dnl
> > @@ -6865,6 +6868,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | 
> > sed 's/192\.168\.0\..0/192.
> >  
> >  check ovn-nbctl lr-route-del lr0
> >  wait_row_count nb:Logical_Router_Static_Route 0
> > +check_row_count ECMP_Nexthop 0
> >  
> >  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> >  ovn-sbctl dump-flows lr0 > lr0flows
> 
> Regards,
> Dumitru
> 
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to