The northd creates flows that restrict processing ARP requests: If ARP request comes from lswitch that has either localnet or l2gateway ports and router's attachment port is L3 dgw port such ARP will be allowed only on resident chassis.
However, same lswitch could also have 'normal' aka virtual lports. For these lports restrictions described above should not be applied. The fix moves is_chassis_resident check from lrouter to lswitch level. The residence check is only applied for ARP requests that were originated from localnet/l2gateway ports. If such check is succeeded ARP request is allowed otherwise ARP request packet is dropped. For ARP requests coming from virtual ports no residence restrictions are applied. Co-authored-by: Alexandra Rukomoinikova <arukomoinikova@k2.cloud> Signed-off-by: Aleksandr Smirnov <alekssmirnov@k2.cloud> Signed-off-by: Alexandra Rukomoinikova <arukomoinikova@k2.cloud> --- v2: Minor fixes, add co-authored-by v3: Apply Dumitru comments, code rework, move part of processing into new I-P node. --- lib/stopwatch-names.h | 1 + northd/automake.mk | 2 + northd/en-lflow.c | 29 ++++ northd/en-lflow.h | 2 + northd/en-ls-arp.c | 284 +++++++++++++++++++++++++++++++++++++++ northd/en-ls-arp.h | 103 ++++++++++++++ northd/inc-proc-northd.c | 8 ++ northd/northd.c | 178 ++++++++++++++++++++++-- northd/northd.h | 10 ++ northd/ovn-northd.c | 1 + tests/ovn-northd.at | 10 +- tests/ovn.at | 157 ++++++++++++++++++++++ 12 files changed, 769 insertions(+), 16 deletions(-) create mode 100644 northd/en-ls-arp.c create mode 100644 northd/en-ls-arp.h diff --git a/lib/stopwatch-names.h b/lib/stopwatch-names.h index f3a931c40..b912e813c 100644 --- a/lib/stopwatch-names.h +++ b/lib/stopwatch-names.h @@ -34,6 +34,7 @@ #define LR_NAT_RUN_STOPWATCH_NAME "lr_nat_run" #define LR_STATEFUL_RUN_STOPWATCH_NAME "lr_stateful" #define LS_STATEFUL_RUN_STOPWATCH_NAME "ls_stateful" +#define LS_ARP_RUN_STOPWATCH_NAME "ls_arp" #define ADVERTISED_ROUTE_SYNC_RUN_STOPWATCH_NAME "advertised_route_sync" #define LEARNED_ROUTE_SYNC_RUN_STOPWATCH_NAME "learned_route_sync" #define DYNAMIC_ROUTES_RUN_STOPWATCH_NAME "dynamic_routes" diff --git a/northd/automake.mk b/northd/automake.mk index 45ca0337f..8cd4fb3a1 100644 --- a/northd/automake.mk +++ b/northd/automake.mk @@ -44,6 +44,8 @@ northd_ovn_northd_SOURCES = \ northd/en-lr-stateful.h \ northd/en-ls-stateful.c \ northd/en-ls-stateful.h \ + northd/en-ls-arp.c \ + northd/en-ls-arp.h \ northd/en-sampling-app.c \ northd/en-sampling-app.h \ northd/en-acl-ids.c \ diff --git a/northd/en-lflow.c b/northd/en-lflow.c index f903f5e3a..772daf623 100644 --- a/northd/en-lflow.c +++ b/northd/en-lflow.c @@ -23,6 +23,7 @@ #include "en-lr-nat.h" #include "en-lr-stateful.h" #include "en-ls-stateful.h" +#include "en-ls-arp.h" #include "en-multicast.h" #include "en-northd.h" #include "en-meters.h" @@ -60,6 +61,8 @@ lflow_get_input_data(struct engine_node *node, engine_get_input_data("lr_stateful", node); struct ed_type_ls_stateful *ls_stateful_data = engine_get_input_data("ls_stateful", node); + struct ed_type_ls_arp *ls_arp_data = + engine_get_input_data("ls_arp", node); struct multicast_igmp_data *multicat_igmp_data = engine_get_input_data("multicast_igmp", node); struct ic_learned_svc_monitors_data *ic_learned_svc_monitors_data = @@ -84,6 +87,7 @@ lflow_get_input_data(struct engine_node *node, lflow_input->ls_port_groups = &pg_data->ls_port_groups; lflow_input->lr_stateful_table = &lr_stateful_data->table; lflow_input->ls_stateful_table = &ls_stateful_data->table; + lflow_input->ls_arp_table = &ls_arp_data->table; lflow_input->meter_groups = &sync_meters_data->meter_groups; lflow_input->lb_datapaths_map = &northd_data->lb_datapaths_map; lflow_input->local_svc_monitors_map = @@ -215,6 +219,31 @@ lflow_ls_stateful_handler(struct engine_node *node, void *data) return EN_HANDLED_UPDATED; } +enum engine_input_handler_result +lflow_ls_arp_handler(struct engine_node *node, void *data) +{ + struct ed_type_ls_arp *ls_arp_data = + engine_get_input_data("ls_arp", node); + + if (!ls_arp_has_tracked_data(&ls_arp_data->trk_data)) { + return EN_UNHANDLED; + } + + const struct engine_context *eng_ctx = engine_get_context(); + struct lflow_data *lflow_data = data; + struct lflow_input lflow_input; + + lflow_get_input_data(node, &lflow_input); + if (!lflow_handle_ls_arp_changes(eng_ctx->ovnsb_idl_txn, + &ls_arp_data->trk_data, + &lflow_input, + lflow_data->lflow_table)) { + return EN_UNHANDLED; + } + + return EN_HANDLED_UPDATED; +} + enum engine_input_handler_result lflow_multicast_igmp_handler(struct engine_node *node, void *data) { diff --git a/northd/en-lflow.h b/northd/en-lflow.h index 99bcfda15..fab778d03 100644 --- a/northd/en-lflow.h +++ b/northd/en-lflow.h @@ -25,6 +25,8 @@ lflow_lr_stateful_handler(struct engine_node *, void *data); enum engine_input_handler_result lflow_ls_stateful_handler(struct engine_node *node, void *data); enum engine_input_handler_result +lflow_ls_arp_handler(struct engine_node *node, void *data); +enum engine_input_handler_result lflow_multicast_igmp_handler(struct engine_node *node, void *data); enum engine_input_handler_result lflow_group_ecmp_route_change_handler(struct engine_node *node, void *data); diff --git a/northd/en-ls-arp.c b/northd/en-ls-arp.c new file mode 100644 index 000000000..26f2f815e --- /dev/null +++ b/northd/en-ls-arp.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2024, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +/* OVS includes */ +#include "include/openvswitch/hmap.h" +#include "openvswitch/util.h" +#include "openvswitch/vlog.h" +#include "stopwatch.h" + +/* OVN includes */ +#include "en-lr-nat.h" +#include "en-ls-arp.h" +#include "lib/inc-proc-eng.h" +#include "lib/ovn-nb-idl.h" +#include "lib/ovn-sb-idl.h" +#include "lib/ovn-util.h" +#include "lib/stopwatch-names.h" +#include "lflow-mgr.h" +#include "northd.h" + +VLOG_DEFINE_THIS_MODULE(en_ls_arp); + +/* static functions. */ + +struct ls_arp_input { + const struct ovn_datapaths *ls_datapaths; + const struct hmap *lr_nats; +}; + +static struct ls_arp_input +ls_arp_get_input_data(struct engine_node *node) +{ + const struct northd_data *northd_data = + engine_get_input_data("northd", node); + struct ed_type_lr_nat_data *lr_nat_data = + engine_get_input_data("lr_nat", node); + + return (struct ls_arp_input) { + .ls_datapaths = &northd_data->ls_datapaths, + .lr_nats = &lr_nat_data->lr_nats.entries, + }; +} + +static void +ls_arp_table_clear(struct ls_arp_table *table) +{ + struct ls_arp_record *ar; + HMAP_FOR_EACH_POP (ar, key_node, &table->entries) { + lflow_ref_destroy(ar->lflow_ref); + hmapx_destroy(&ar->nat_records); + free(ar); + } +} + +/* + * Return hmapx (odmap) of datapaths, assumed lswitches, + * that are gateways for given nat. + */ +static void +nat_odmap_create(struct lr_nat_record *nr, + struct hmapx *odmap) +{ + for (size_t i = 0; i < nr->n_nat_entries; i++) { + struct ovn_nat *ent = &nr->nat_entries[i]; + + if (ent->is_valid + && ent->l3dgw_port + && ent->l3dgw_port->peer + && ent->l3dgw_port->peer->od + && !ent->is_distributed) { + hmapx_add(odmap, ent->l3dgw_port->peer->od); + } + } +} + +static bool +ods_find_by_index(struct hmapx *odmap, size_t index) +{ + struct hmapx_node *hmapx_node; + HMAPX_FOR_EACH (hmapx_node, odmap) { + struct ovn_datapath *od = hmapx_node->data; + + if (od->index == index) { + return true; + } + } + + return false; +} + +static struct ls_arp_record* +ars_find_by_index(const struct ls_arp_table *table, size_t index) +{ + struct ls_arp_record *ar; + HMAP_FOR_EACH (ar, key_node, &table->entries) { + if (ar->ls_index == index) { + return ar; + } + } + + return NULL; +} + +/* + * Fill nat_records in given ls_arp_record with nat records that have + * lswitch, owned by arp record, as nat gateway. + */ +static void +ls_arp_record_set_nats(struct ls_arp_record *ar, + const struct hmap *nats) +{ + hmapx_init(&ar->nat_records); + + struct lr_nat_record *nr; + HMAP_FOR_EACH (nr, key_node, nats) { + struct hmapx ods = HMAPX_INITIALIZER(&ods); + + nat_odmap_create(nr, &ods); + + if (ods_find_by_index(&ods, ar->ls_index)) { + hmapx_add(&ar->nat_records, nr); + } + + hmapx_destroy(&ods); + } +} + +static struct ls_arp_record * +ls_arp_record_create(struct ls_arp_table *table, + const struct ovn_datapath *od, + const struct hmap *lr_nats) +{ + struct ls_arp_record *ar = xzalloc(sizeof *ar); + + ar->ls_index = od->index; + ls_arp_record_set_nats(ar, lr_nats); + ar->lflow_ref = lflow_ref_create(); + + hmap_insert(&table->entries, &ar->key_node, + uuid_hash(&od->nbs->header_.uuid)); + + return ar; +} + +/* public functions. */ +void* +en_ls_arp_init(struct engine_node *node OVS_UNUSED, + struct engine_arg *arg OVS_UNUSED) +{ + struct ed_type_ls_arp *data = xzalloc(sizeof *data); + + hmap_init(&data->table.entries); + hmapx_init(&data->trk_data.crupdated); + + return data; +} + +void +en_ls_arp_cleanup(void *data_) +{ + struct ed_type_ls_arp *data = data_; + + ls_arp_table_clear(&data->table); + hmap_destroy(&data->table.entries); + hmapx_destroy(&data->trk_data.crupdated); +} + +void +en_ls_arp_clear_tracked_data(void *data_) +{ + struct ed_type_ls_arp *data = data_; + hmapx_clear(&data->trk_data.crupdated); +} + +enum engine_node_state +en_ls_arp_run(struct engine_node *node, void *data_) +{ + struct ls_arp_input input_data = ls_arp_get_input_data(node); + struct ed_type_ls_arp *data = data_; + + stopwatch_start(LS_ARP_RUN_STOPWATCH_NAME, time_msec()); + + ls_arp_table_clear(&data->table); + + const struct ovn_datapath *od; + HMAP_FOR_EACH (od, key_node, &input_data.ls_datapaths->datapaths) { + ls_arp_record_create(&data->table, od, input_data.lr_nats); + } + + stopwatch_stop(LS_ARP_RUN_STOPWATCH_NAME, time_msec()); + + return EN_UPDATED; +} + +/* Handler functions. */ + +enum engine_input_handler_result +ls_arp_northd_handler(struct engine_node *node, void *x OVS_UNUSED) +{ + struct northd_data *northd_data = engine_get_input_data("northd", node); + if (!northd_has_tracked_data(&northd_data->trk_data)) { + return EN_UNHANDLED; + } + + return EN_HANDLED_UNCHANGED; +} + +enum engine_input_handler_result +ls_arp_lr_nat_handler(struct engine_node *node, void *data_) +{ + struct ed_type_lr_nat_data *lr_nat_data = + engine_get_input_data("lr_nat", node); + + if (!lr_nat_has_tracked_data(&lr_nat_data->trk_data)) { + return EN_UNHANDLED; + } + + struct ed_type_ls_arp *data = data_; + + struct hmapx_node *hmapx_node; + HMAPX_FOR_EACH (hmapx_node, &lr_nat_data->trk_data.crupdated) { + struct lr_nat_record *nr_cur = hmapx_node->data; + struct hmapx ods = HMAPX_INITIALIZER(&ods); + + /* Collect all ods (lswitch) that are gateways for given nat */ + nat_odmap_create(nr_cur, &ods); + + struct ls_arp_record *ar; + LS_ARP_TABLE_FOR_EACH (ar, &data->table) { + struct hmapx_node *nr_node = hmapx_find(&ar->nat_records, nr_cur); + + /* current nat record is already registered for given arp_record */ + if (nr_node) { + /* trigger this arp_record to regenerate od lflow */ + hmapx_add(&data->trk_data.crupdated, ar); + + /* ... but not part of affected ods anymore, + i.e. the change of the nat removes this gateway */ + if (!ods_find_by_index(&ods, ar->ls_index)) { + hmapx_delete(&ar->nat_records, nr_node); + } + } + } + + /* Process gateways that are new */ + struct hmapx_node *hmapx_node2; + HMAPX_FOR_EACH (hmapx_node2, &ods) { + struct ovn_datapath *od = hmapx_node2->data; + + /* Determine which arp_record is affected */ + ar = ars_find_by_index(&data->table, od->index); + ovs_assert(ar); + + /* new gateway triggers lflow regeneration for this arp_records */ + hmapx_add(&data->trk_data.crupdated, ar); + hmapx_add(&ar->nat_records, nr_cur); + } + + hmapx_destroy(&ods); + } + + + if (ls_arp_has_tracked_data(&data->trk_data)) { + return EN_HANDLED_UPDATED; + } + + return EN_HANDLED_UNCHANGED; +} + diff --git a/northd/en-ls-arp.h b/northd/en-ls-arp.h new file mode 100644 index 000000000..313ec0db0 --- /dev/null +++ b/northd/en-ls-arp.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2024, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef EN_LS_ARP_H +#define EN_LS_ARP_H 1 + +/* OVS includes. */ +#include "lib/hmapx.h" +#include "openvswitch/hmap.h" + +/* OVN includes. */ +#include "lib/inc-proc-eng.h" +#include "lib/ovn-nb-idl.h" +#include "lib/ovn-sb-idl.h" +#include "lib/ovn-util.h" +#include "lib/stopwatch-names.h" + +struct lflow_ref; + +struct ls_arp_record { + struct hmap_node key_node; + + /* Unique id of the logical switch. Note : This id is assigned + * by the northd engine node for each logical switch. */ + size_t ls_index; + + /* 'lflow_ref' is used to reference logical flows generated for + * this ls_arp record. + * + * This data is initialized and destroyed by the en_ls_arp node, + * but populated and used only by the en_lflow node. Ideally this data + * should be maintained as part of en_lflow's data. However, it would + * be less efficient and more complex: + * + * 1. It would require an extra search (using the index) to find the + * lflows. + * + * 2. Building the index needs to be thread-safe, using either a global + * lock which is obviously less efficient, or hash-based lock array which + * is more complex. + * + * Adding the lflow_ref here is more straightforward. The drawback is that + * we need to keep in mind that this data belongs to en_lflow node, so + * never access it from any other nodes. + * + * Note: lflow_ref is not thread safe. Only one thread should + * access ls_arp_record->lflow_ref at any given time. + */ + struct lflow_ref *lflow_ref; + + /* lr_nat_record ptrs that trigger this od to rebuild lflow */ + struct hmapx nat_records; +}; + +struct ls_arp_table { + struct hmap entries; +}; + +#define LS_ARP_TABLE_FOR_EACH(LS_ARP_REC, TABLE) \ + HMAP_FOR_EACH (LS_ARP_REC, key_node, &(TABLE)->entries) + +#define LS_ARP_TABLE_FOR_EACH_IN_P(LS_ARP_REC, JOBID, TABLE) \ + HMAP_FOR_EACH_IN_PARALLEL (LS_ARP_REC, key_node, JOBID, \ + &(TABLE)->entries) + +struct ls_arp_tracked_data { + struct hmapx crupdated; /* Stores 'struct ls_arp_record'. */ +}; + +struct ed_type_ls_arp { + struct ls_arp_table table; + struct ls_arp_tracked_data trk_data; +}; + +void *en_ls_arp_init(struct engine_node*, struct engine_arg*); +void en_ls_arp_cleanup(void* data); +void en_ls_arp_clear_tracked_data(void* data); +enum engine_node_state en_ls_arp_run(struct engine_node*, void* data); + +enum engine_input_handler_result +ls_arp_lr_nat_handler(struct engine_node *node, void *data); +enum engine_input_handler_result +ls_arp_northd_handler(struct engine_node *node, void *data); + +static inline bool +ls_arp_has_tracked_data(struct ls_arp_tracked_data *trk_data) { + return !hmapx_is_empty(&trk_data->crupdated); +} + +#endif /* EN_LS_ARP_H */ + diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c index bfb893a1a..8068123f0 100644 --- a/northd/inc-proc-northd.c +++ b/northd/inc-proc-northd.c @@ -34,6 +34,7 @@ #include "en-lr-stateful.h" #include "en-lr-nat.h" #include "en-ls-stateful.h" +#include "en-ls-arp.h" #include "en-multicast.h" #include "en-northd.h" #include "en-lflow.h" @@ -171,6 +172,7 @@ static ENGINE_NODE(lb_data, CLEAR_TRACKED_DATA); static ENGINE_NODE(lr_nat, CLEAR_TRACKED_DATA); static ENGINE_NODE(lr_stateful, CLEAR_TRACKED_DATA); static ENGINE_NODE(ls_stateful, CLEAR_TRACKED_DATA); +static ENGINE_NODE(ls_arp, CLEAR_TRACKED_DATA); static ENGINE_NODE(route_policies); static ENGINE_NODE(routes); static ENGINE_NODE(bfd); @@ -296,6 +298,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, ls_stateful_port_group_handler); engine_add_input(&en_ls_stateful, &en_nb_acl, ls_stateful_acl_handler); + engine_add_input(&en_ls_arp, &en_lr_nat, ls_arp_lr_nat_handler); + engine_add_input(&en_ls_arp, &en_northd, ls_arp_northd_handler); + engine_add_input(&en_mac_binding_aging, &en_sb_mac_binding, NULL); engine_add_input(&en_mac_binding_aging, &en_northd, NULL); engine_add_input(&en_mac_binding_aging, &en_mac_binding_aging_waker, NULL); @@ -390,6 +395,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_add_input(&en_lflow, &en_port_group, engine_noop_handler); engine_add_input(&en_lflow, &en_lr_stateful, lflow_lr_stateful_handler); engine_add_input(&en_lflow, &en_ls_stateful, lflow_ls_stateful_handler); + + engine_add_input(&en_lflow, &en_ls_arp, lflow_ls_arp_handler); + engine_add_input(&en_lflow, &en_multicast_igmp, lflow_multicast_igmp_handler); engine_add_input(&en_lflow, &en_sb_acl_id, NULL); diff --git a/northd/northd.c b/northd/northd.c index e0a329a17..e8791b63d 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -50,6 +50,7 @@ #include "en-lr-nat.h" #include "en-lr-stateful.h" #include "en-ls-stateful.h" +#include "en-ls-arp.h" #include "en-multicast.h" #include "en-sampling-app.h" #include "en-datapath-logical-switch.h" @@ -135,6 +136,7 @@ static bool vxlan_mode; #define REGBIT_IP_FRAG "reg0[19]" #define REGBIT_ACL_PERSIST_ID "reg0[20]" #define REGBIT_ACL_HINT_ALLOW_PERSISTED "reg0[21]" +#define REGBIT_EXT_ARP "reg0[22]" /* Register definitions for switches and routers. */ @@ -532,6 +534,7 @@ ovn_datapath_create(struct hmap *datapaths, const struct uuid *key, hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key)); od->lr_group = NULL; hmap_init(&od->ports); + hmapx_init(&od->ph_ports); sset_init(&od->router_ips); od->ls_peers = VECTOR_EMPTY_INITIALIZER(struct ovn_datapath *); od->router_ports = VECTOR_EMPTY_INITIALIZER(struct ovn_port *); @@ -569,6 +572,7 @@ ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od) vector_destroy(&od->l3dgw_ports); destroy_mcast_info_for_datapath(od); destroy_ports_for_datapath(od); + hmapx_destroy(&od->ph_ports); sset_destroy(&od->router_ips); free(od); } @@ -1177,6 +1181,12 @@ lsp_is_vtep(const struct nbrec_logical_switch_port *nbsp) return !strcmp(nbsp->type, "vtep"); } +static bool +lsp_is_l2gw(const struct nbrec_logical_switch_port *nbsp) +{ + return !strcmp(nbsp->type, "l2gateway"); +} + static bool localnet_can_learn_mac(const struct nbrec_logical_switch_port *nbsp) { @@ -1563,6 +1573,10 @@ join_logical_ports_lsp(struct hmap *ports, od->has_vtep_lports = true; } + if (lsp_is_localnet(nbsp) || lsp_is_l2gw(nbsp)) { + hmapx_add(&od->ph_ports, op); + } + parse_lsp_addrs(op); op->od = od; @@ -1956,6 +1970,15 @@ join_logical_ports(const struct sbrec_port_binding_table *sbrec_pb_table, } } +static bool +is_nat_distributed(const struct nbrec_nat *nat, + const struct ovn_datapath *od) +{ + return !vector_is_empty(&od->l3dgw_ports) + && nat->logical_port && nat->external_mac + && !strcmp(nat->type, "dnat_and_snat"); +} + /* Returns an array of strings, each consisting of a MAC address followed * by one or more IP addresses, and if the port is a distributed gateway * port, followed by 'is_chassis_resident("LPORT_NAME")', where the @@ -2014,9 +2037,7 @@ get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only, /* Determine whether this NAT rule satisfies the conditions for * distributed NAT processing. */ - if (!vector_is_empty(&op->od->l3dgw_ports) && - !strcmp(nat->type, "dnat_and_snat") && - nat->logical_port && nat->external_mac) { + if (is_nat_distributed(nat, op->od)) { /* Distributed NAT rule. */ if (eth_addr_from_string(nat->external_mac, &mac)) { struct ds address = DS_EMPTY_INITIALIZER; @@ -9210,6 +9231,90 @@ build_drop_arp_nd_flows_for_unbound_router_ports(struct ovn_port *op, ds_destroy(&match); } +/* + * Create ARP filtering flow for od, assumed logical switch, + * for the following condition: + * Given lswitch has either localnet or l2gateway ports and + * router connection ports that requires chassis residence. + * ARP requests coming from localnet/l2gateway ports + * allowed for processing on resident chassis only. + */ +static void +build_lswitch_arp_chassis_resident(const struct ovn_datapath *od, + struct lflow_table *lflows, + const struct ls_arp_record *ar) +{ + struct sset port_check; + struct sset resident_check; + + sset_init(&port_check); + sset_init(&resident_check); + + struct ds match = DS_EMPTY_INITIALIZER; + + struct hmapx_node *node; + HMAPX_FOR_EACH (node, &od->ph_ports) { + struct ovn_port *op = node->data; + + ds_clear(&match); + ds_put_format(&match, "(arp.op == 1 || arp.op == 2) && inport == %s", + op->json_key); + sset_add(&port_check, ds_cstr(&match)); + } + + struct ovn_port *op; + VECTOR_FOR_EACH (&od->router_ports, op) { + struct ovn_port *op_r = op->peer; + + if (lrp_is_l3dgw(op_r)) { + ds_clear(&match); + ds_put_format(&match, + REGBIT_EXT_ARP" == 1 && is_chassis_resident(%s)", + op_r->cr_port->json_key); + sset_add(&resident_check, ds_cstr(&match)); + } + } + + struct hmapx_node *hmapx_node; + HMAPX_FOR_EACH (hmapx_node, &ar->nat_records) { + struct lr_nat_record *nr = hmapx_node->data; + + for (size_t i = 0; i < nr->n_nat_entries; i++) { + struct ovn_nat *ent = &nr->nat_entries[i]; + if (ent->is_valid && ent->is_distributed) { + ds_clear(&match); + ds_put_format(&match, + REGBIT_EXT_ARP + " == 1 && is_chassis_resident(\"%s\")", + ent->nb->logical_port); + sset_add(&resident_check, ds_cstr(&match)); + } + } + } + + if (!sset_is_empty(&port_check) && !sset_is_empty(&resident_check)) { + const char *item; + + SSET_FOR_EACH(item, &port_check) { + ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 75, + item, REGBIT_EXT_ARP" = 1; next;", + ar->lflow_ref); + } + + SSET_FOR_EACH(item, &resident_check) { + ovn_lflow_add(lflows, od, S_SWITCH_IN_APPLY_PORT_SEC, 75, + item, "next;", ar->lflow_ref); + } + + ovn_lflow_add(lflows, od, S_SWITCH_IN_APPLY_PORT_SEC, 70, + REGBIT_EXT_ARP" == 1", "drop;", ar->lflow_ref); + } + + ds_destroy(&match); + sset_destroy(&port_check); + sset_destroy(&resident_check); +} + static bool is_vlan_transparent(const struct ovn_datapath *od) { @@ -13594,10 +13699,6 @@ build_neigh_learning_flows_for_lrouter_port( op->lrp_networks.ipv4_addrs[i].network_s, op->lrp_networks.ipv4_addrs[i].plen, op->lrp_networks.ipv4_addrs[i].addr_s); - if (lrp_is_l3dgw(op)) { - ds_put_format(match, " && is_chassis_resident(%s)", - op->cr_port->json_key); - } const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT " = lookup_arp(inport, arp.spa, arp.sha); " REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;" @@ -13614,10 +13715,6 @@ build_neigh_learning_flows_for_lrouter_port( op->json_key, op->lrp_networks.ipv4_addrs[i].network_s, op->lrp_networks.ipv4_addrs[i].plen); - if (lrp_is_l3dgw(op)) { - ds_put_format(match, " && is_chassis_resident(%s)", - op->cr_port->json_key); - } ds_clear(actions); ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT " = lookup_arp(inport, arp.spa, arp.sha); %snext;", @@ -17532,6 +17629,7 @@ struct lswitch_flow_build_info { const struct ls_port_group_table *ls_port_groups; const struct lr_stateful_table *lr_stateful_table; const struct ls_stateful_table *ls_stateful_table; + const struct ls_arp_table *ls_arp_table; struct lflow_table *lflows; const struct shash *meter_groups; const struct hmap *lb_dps_map; @@ -17714,6 +17812,7 @@ build_lflows_thread(void *arg) struct worker_control *control = (struct worker_control *) arg; const struct lr_stateful_record *lr_stateful_rec; const struct ls_stateful_record *ls_stateful_rec; + const struct ls_arp_record *ls_arp_rec; struct lswitch_flow_build_info *lsi; struct ovn_lb_datapaths *lb_dps; struct ovn_datapath *od; @@ -17870,6 +17969,18 @@ build_lflows_thread(void *arg) } } + for (bnum = control->id; + bnum <= lsi->ls_arp_table->entries.mask; + bnum += control->pool->size) + { + LS_ARP_TABLE_FOR_EACH_IN_P (ls_arp_rec, bnum, + lsi->ls_arp_table) { + od = ovn_datapaths_find_by_index( + lsi->ls_datapaths, ls_arp_rec->ls_index); + build_lswitch_arp_chassis_resident(od, lsi->lflows, ls_arp_rec); + } + } + lsi->thread_lflow_counter = thread_lflow_counter; } post_completed_work(control); @@ -17918,6 +18029,7 @@ build_lswitch_and_lrouter_flows( const struct ls_port_group_table *ls_pgs, const struct lr_stateful_table *lr_stateful_table, const struct ls_stateful_table *ls_stateful_table, + const struct ls_arp_table *ls_arp_table, struct lflow_table *lflows, const struct shash *meter_groups, const struct hmap *lb_dps_map, @@ -17954,6 +18066,7 @@ build_lswitch_and_lrouter_flows( lsiv[index].ls_port_groups = ls_pgs; lsiv[index].lr_stateful_table = lr_stateful_table; lsiv[index].ls_stateful_table = ls_stateful_table; + lsiv[index].ls_arp_table = ls_arp_table; lsiv[index].meter_groups = meter_groups; lsiv[index].lb_dps_map = lb_dps_map; lsiv[index].local_svc_monitor_map = @@ -17988,6 +18101,7 @@ build_lswitch_and_lrouter_flows( } else { const struct lr_stateful_record *lr_stateful_rec; const struct ls_stateful_record *ls_stateful_rec; + const struct ls_arp_record *ls_arp_rec; struct ovn_lb_datapaths *lb_dps; struct ovn_datapath *od; struct ovn_port *op; @@ -18000,6 +18114,7 @@ build_lswitch_and_lrouter_flows( .ls_port_groups = ls_pgs, .lr_stateful_table = lr_stateful_table, .ls_stateful_table = ls_stateful_table, + .ls_arp_table = ls_arp_table, .lflows = lflows, .meter_groups = meter_groups, .lb_dps_map = lb_dps_map, @@ -18095,6 +18210,12 @@ build_lswitch_and_lrouter_flows( lsi.sbrec_acl_id_table); } + LS_ARP_TABLE_FOR_EACH (ls_arp_rec, ls_arp_table) { + od = ovn_datapaths_find_by_index(lsi.ls_datapaths, + ls_arp_rec->ls_index); + build_lswitch_arp_chassis_resident(od, lsi.lflows, ls_arp_rec); + } + ds_destroy(&lsi.match); ds_destroy(&lsi.actions); } @@ -18178,6 +18299,7 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn, input_data->ls_port_groups, input_data->lr_stateful_table, input_data->ls_stateful_table, + input_data->ls_arp_table, lflows, input_data->meter_groups, input_data->lb_datapaths_map, @@ -18603,6 +18725,40 @@ lflow_handle_ls_stateful_changes(struct ovsdb_idl_txn *ovnsb_txn, return true; } +bool +lflow_handle_ls_arp_changes(struct ovsdb_idl_txn *ovnsb_txn, + struct ls_arp_tracked_data *trk_data, + struct lflow_input *lflow_input, + struct lflow_table *lflows) +{ + struct hmapx_node *hmapx_node; + + HMAPX_FOR_EACH (hmapx_node, &trk_data->crupdated) { + const struct ls_arp_record *ar = hmapx_node->data; + const struct ovn_datapath *od = + ovn_datapaths_find_by_index(lflow_input->ls_datapaths, + ar->ls_index); + lflow_ref_unlink_lflows(ar->lflow_ref); + + /* Generate new lflows. */ + build_lswitch_arp_chassis_resident(od, lflows, ar); + + /* Sync the new flows to SB. */ + bool handled = lflow_ref_sync_lflows( + ar->lflow_ref, lflows, ovnsb_txn, + lflow_input->ls_datapaths, + lflow_input->lr_datapaths, + lflow_input->ovn_internal_version_changed, + lflow_input->sbrec_logical_flow_table, + lflow_input->sbrec_logical_dp_group_table); + if (!handled) { + return false; + } + } + + return true; +} + static bool mirror_needs_update(const struct nbrec_mirror *nb_mirror, const struct sbrec_mirror *sb_mirror) diff --git a/northd/northd.h b/northd/northd.h index 8f865e8b3..d75883612 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -20,12 +20,14 @@ #include "lib/ovn-util.h" #include "lib/ovs-atomic.h" #include "lib/sset.h" +#include "lib/hmapx.h" #include "northd/en-port-group.h" #include "northd/ipam.h" #include "openvswitch/hmap.h" #include "simap.h" #include "ovs-thread.h" #include "en-lr-stateful.h" +#include "en-ls-arp.h" #include "vec.h" #include "datapath-sync.h" @@ -253,6 +255,7 @@ struct lflow_input { const struct ls_port_group_table *ls_port_groups; const struct lr_stateful_table *lr_stateful_table; const struct ls_stateful_table *ls_stateful_table; + const struct ls_arp_table *ls_arp_table; const struct shash *meter_groups; const struct hmap *lb_datapaths_map; const struct sset *bfd_ports; @@ -445,6 +448,9 @@ struct ovn_datapath { * Valid only if it is logical router datapath. NULL otherwise. */ struct lrouter_group *lr_group; + /* Set of localnet or l2gw ports. */ + struct hmapx ph_ports; + /* Map of ovn_port objects belonging to this datapath. * This map doesn't include derived ports. */ struct hmap ports; @@ -922,6 +928,10 @@ bool lflow_handle_ls_stateful_changes(struct ovsdb_idl_txn *, struct ls_stateful_tracked_data *, struct lflow_input *, struct lflow_table *lflows); +bool lflow_handle_ls_arp_changes(struct ovsdb_idl_txn *, + struct ls_arp_tracked_data *, + struct lflow_input *, + struct lflow_table *lflows); bool northd_handle_sb_port_binding_changes( const struct sbrec_port_binding_table *, struct hmap *ls_ports, struct hmap *lr_ports); diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 62da4a3aa..ab9339369 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -989,6 +989,7 @@ main(int argc, char *argv[]) stopwatch_create(LR_NAT_RUN_STOPWATCH_NAME, SW_MS); stopwatch_create(LR_STATEFUL_RUN_STOPWATCH_NAME, SW_MS); stopwatch_create(LS_STATEFUL_RUN_STOPWATCH_NAME, SW_MS); + stopwatch_create(LS_ARP_RUN_STOPWATCH_NAME, SW_MS); stopwatch_create(ADVERTISED_ROUTE_SYNC_RUN_STOPWATCH_NAME, SW_MS); stopwatch_create(LEARNED_ROUTE_SYNC_RUN_STOPWATCH_NAME, SW_MS); stopwatch_create(DYNAMIC_ROUTES_RUN_STOPWATCH_NAME, SW_MS); diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index b2b9f092c..57372a1e6 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -7274,10 +7274,10 @@ AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | ovn_strip_lflows], [0], [d table=??(lr_in_admission ), priority=50 , match=(eth.dst == 04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;) ]) # Check the flows in lr_in_lookup_neighbor stage -AT_CHECK([grep lr_in_lookup_neighbor lrflows | grep cr-DR | ovn_strip_lflows], [0], [dnl - table=??(lr_in_lookup_neighbor), priority=100 , match=(inport == "DR-S1" && arp.spa == 172.16.1.0/24 && arp.op == 1 && is_chassis_resident("cr-DR-S1")), action=(reg9[[2]] = lookup_arp(inport, arp.spa, arp.sha); next;) - table=??(lr_in_lookup_neighbor), priority=100 , match=(inport == "DR-S2" && arp.spa == 172.16.2.0/24 && arp.op == 1 && is_chassis_resident("cr-DR-S2")), action=(reg9[[2]] = lookup_arp(inport, arp.spa, arp.sha); next;) - table=??(lr_in_lookup_neighbor), priority=100 , match=(inport == "DR-S3" && arp.spa == 172.16.3.0/24 && arp.op == 1 && is_chassis_resident("cr-DR-S3")), action=(reg9[[2]] = lookup_arp(inport, arp.spa, arp.sha); next;) +AT_CHECK([grep lr_in_lookup_neighbor lrflows | grep DR-S. | ovn_strip_lflows], [0], [dnl + table=??(lr_in_lookup_neighbor), priority=100 , match=(inport == "DR-S1" && arp.spa == 172.16.1.0/24 && arp.op == 1), action=(reg9[[2]] = lookup_arp(inport, arp.spa, arp.sha); next;) + table=??(lr_in_lookup_neighbor), priority=100 , match=(inport == "DR-S2" && arp.spa == 172.16.2.0/24 && arp.op == 1), action=(reg9[[2]] = lookup_arp(inport, arp.spa, arp.sha); next;) + table=??(lr_in_lookup_neighbor), priority=100 , match=(inport == "DR-S3" && arp.spa == 172.16.3.0/24 && arp.op == 1), action=(reg9[[2]] = lookup_arp(inport, arp.spa, arp.sha); next;) ]) # Check the flows in lr_in_gw_redirect stage AT_CHECK([grep lr_in_gw_redirect lrflows | grep cr-DR | ovn_strip_lflows], [0], [dnl @@ -13902,7 +13902,7 @@ AT_CHECK([grep -Fe "172.168.0.110" -e "172.168.0.120" -e "10.0.0.3" -e "20.0.0.3 table=??(lr_out_undnat ), priority=100 , match=(ip && ip4.src == 20.0.0.3 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_dnat;) ]) -AT_CHECK([grep -Fe "172.168.0.110" -e "172.168.0.120" -e "10.0.0.3" -e "20.0.0.3" -e "30:54:00:00:00:03" -e "sw0-port1" publicflows | ovn_strip_lflows], [0], [dnl +AT_CHECK([grep -Fe "172.168.0.110" -e "172.168.0.120" -e "10.0.0.3" -e "20.0.0.3" -e "30:54:00:00:00:03" -e "sw0-port1" publicflows | ovn_strip_lflows | grep -v "reg0.*22"], [0], [dnl table=??(ls_in_l2_lkup ), priority=50 , match=(eth.dst == 30:54:00:00:00:03 && is_chassis_resident("sw0-port1")), action=(outport = "public-lr0"; output;) table=??(ls_in_l2_lkup ), priority=75 , match=(eth.src == {00:00:00:00:ff:02, 30:54:00:00:00:03} && eth.dst == ff:ff:ff:ff:ff:ff && (arp.op == 1 || rarp.op == 3 || nd_ns)), action=(outport = "_MC_flood_l2"; output;) table=??(ls_in_l2_lkup ), priority=80 , match=(flags[[1]] == 0 && arp.op == 1 && arp.tpa == 172.168.0.110), action=(clone {outport = "public-lr0"; output; }; outport = "_MC_flood_l2"; output;) diff --git a/tests/ovn.at b/tests/ovn.at index 292ca0dae..926efde08 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -43872,3 +43872,160 @@ wait_for_ports_up sw0-vir OVN_CLEANUP([hv1]) AT_CLEANUP ]) + +AT_SETUP([GARP delivery: gw and extenal ports]) +ovn_start +# +# Configure initial environment +# LR1: down_link <-> LS1: up_link +# set lr_down: gateway port (chassis redirect) bound to hv1 +# LS1: down_vif1 - vif port bound to hv1 +# down_vif2 - vif port bound to hv2 +# down_ext - outher port will be iteratred as localnet, l2gateway +# +# Test: throw ARP annonce request from vitrual ports (down_vif1, down_vif2) +# ensure mac_binding is always updated. +# (Fixing the issue: mac_binding is only updated for packets came from +# down_link's resident chassis) +# throw ARP annonce request from from localnet. +# ensure mac_binding is updated only if localnet bound to same hv as l3dgw +# + +check ovn-nbctl lr-add lr1 +check ovn-nbctl lrp-add lr1 down_link f0:00:00:00:00:f1 192.168.1.1/24 + +check ovn-nbctl ls-add ls1 +check ovn-nbctl lsp-add ls1 up_link +check ovn-nbctl lsp-add ls1 down_vif1 +check ovn-nbctl lsp-add ls1 down_vif2 +check ovn-nbctl lsp-add ls1 down_ext + +check ovn-nbctl set Logical_Switch_Port up_link \ + type=router \ + options:router-port=down_link \ + addresses=router + +check ovn-nbctl lsp-set-addresses down_vif1 'f0:00:00:00:00:01 192.168.1.101' +check ovn-nbctl lsp-set-addresses down_vif2 'f0:00:00:00:00:02 192.168.1.102' + +check ovn-nbctl lsp-set-type down_ext localnet +check ovn-nbctl lsp-set-options down_ext network_name=physnet1 +check ovn-nbctl lrp-set-gateway-chassis down_link hv1 + +net_add n1 + +# Create hypervisor hv1 connected to n1 +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl add-port br-int vif1 -- \ + set Interface vif1 external-ids:iface-id=down_vif1 \ + options:tx_pcap=hv1/vif1-tx.pcap options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport_request=1 + +# Create hypervisor hv2 connected to n1, add localnet here +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovs-vsctl add-br br-eth0 +ovn_attach n1 br-phys 192.168.0.2 +ovs-vsctl add-port br-int vif2 -- \ + set Interface vif2 external-ids:iface-id=down_vif2 \ + options:tx_pcap=hv2/vif2-tx.pcap options:rxq_pcap=hv2/vif2-rx.pcap \ + ofport_request=1 + +ovs-vsctl set Open_vSwitch . external_ids:ovn-bridge-mappings="physnet1:br-eth0" + +ovs-vsctl add-port br-eth0 vif_ext -- \ + set Interface vif_ext options:tx_pcap=hv2/vif_ext-tx.pcap \ + options:rxq_pcap=hv2/vif_ext-rx.pcap + +# Pre-populate the hypervisors' ARP tables so that we don't lose any +# packets for ARP resolution (native tunneling doesn't queue packets +# for ARP resolution). +OVN_POPULATE_ARP + +wait_for_ports_up +check ovn-nbctl --wait=hv sync + +# +# Annonce 192.168.1.222 from localnet in hv2 +# result: drop, hv2 is not gateway chassis for down_link +# +sha=0200000000ee +tha=000000000000 +spa=`ip_to_hex 192 168 1 222` +tpa=$spa +garp=ffffffffffff${sha}08060001080006040001${sha}${spa}${tha}${tpa} +as hv2 ovs-appctl netdev-dummy/receive vif_ext $garp + +# +# Make hv2 gateway chassis +# Annonce 192.168.1.223 from localnet in hv2 +# result: ok, hv2 is gateway chassis for down_link +# +check ovn-nbctl lrp-set-gateway-chassis down_link hv2 + +wait_row_count Port_Binding 1 logical_port=cr-down_link 'chassis!=[[]]' + +sha=0200000000ee +tha=000000000000 +spa=`ip_to_hex 192 168 1 223` +tpa=$spa +garp=ffffffffffff${sha}08060001080006040001${sha}${spa}${tha}${tpa} +as hv2 ovs-appctl netdev-dummy/receive vif_ext $garp + +# +# Annonce 192.168.1.111, 112 from vif1, vif2 in hv1, hv2 +# result: ok, vif1, vif2 are virtual ports, restrictions are not applied. +# +sha=f00000000001 +tha=000000000000 +spa=`ip_to_hex 192 168 1 111` +tpa=000000000000 +garp=ffffffffffff${sha}08060001080006040001${sha}${spa}${tha}${tpa} +as hv1 ovs-appctl netdev-dummy/receive vif1 $garp + +sha=f00000000002 +tha=000000000000 +spa=`ip_to_hex 192 168 1 112` +tpa=000000000000 +garp=ffffffffffff${sha}08060001080006040001${sha}${spa}${tha}${tpa} +as hv2 ovs-appctl netdev-dummy/receive vif2 $garp + +# +# Set down_ext type to l2gateway +# Annonce 192.168.1.113, 114 from vif1, vif2 in hv1, hv2 +# result: ok, vif1, vif2 are virtual ports, restrictions are not applied. +# +check ovn-nbctl --wait=hv lsp-set-type down_ext l2gateway + +sha=f00000000001 +tha=000000000000 +spa=`ip_to_hex 192 168 1 113` +tpa=000000000000 +garp=ffffffffffff${sha}08060001080006040001${sha}${spa}${tha}${tpa} +as hv1 ovs-appctl netdev-dummy/receive vif1 $garp + +sha=f00000000002 +tha=000000000000 +spa=`ip_to_hex 192 168 1 114` +tpa=000000000000 +garp=ffffffffffff${sha}08060001080006040001${sha}${spa}${tha}${tpa} +as hv2 ovs-appctl netdev-dummy/receive vif2 $garp + +wait_row_count MAC_Binding 1 ip="192.168.1.111" mac='"f0:00:00:00:00:01"' logical_port='"down_link"' +wait_row_count MAC_Binding 1 ip="192.168.1.112" mac='"f0:00:00:00:00:02"' logical_port='"down_link"' +wait_row_count MAC_Binding 1 ip="192.168.1.113" mac='"f0:00:00:00:00:01"' logical_port='"down_link"' +wait_row_count MAC_Binding 1 ip="192.168.1.114" mac='"f0:00:00:00:00:02"' logical_port='"down_link"' +wait_row_count MAC_Binding 1 ip="192.168.1.223" mac='"02:00:00:00:00:ee"' logical_port='"down_link"' +wait_row_count MAC_Binding 0 ip="192.168.1.222" mac='"02:00:00:00:00:ee"' logical_port='"down_link"' + +check ovn-nbctl --wait=hv lsp-set-type down_ext localnet + +OVN_CLEANUP([hv1],[hv2]) + +AT_CLEANUP +]) + -- 2.49.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev