From: Han Zhou <[email protected]> In the main loop, if the SB DB is disconnected when there is a pending transaction, there can be busy loop causing 100% CPU of ovn-controller, until SB DB is connected again.
The root cause is that when a transaction is pending, ovsdb_idl_loop_run() will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not satisfied and so ofctrl_run() is not executed in the main loop. If there is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again because those messages are not processed because ofctrl_run() is not invoked. This patch fixes the problem by moving ofctrl_run() above and run it whenever br_int is not NULL, and not care about chassis because this function doesn't depend on it. Note: the changes of this patch is better to be shown with "-w" because most of them are indent changes. Signed-off-by: Han Zhou <[email protected]> --- Notes: v1->v2: Updates according to Numan's comments. ovn/controller/ovn-controller.c | 197 ++++++++++++++++++++-------------------- 1 file changed, 100 insertions(+), 97 deletions(-) diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index 882cc19..e924909 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -721,116 +721,119 @@ main(int argc, char *argv[]) &active_tunnels, &local_datapaths, &local_lports, &local_lport_ids); } - if (br_int && chassis) { - struct shash addr_sets = SHASH_INITIALIZER(&addr_sets); - addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl), - &addr_sets); - struct shash port_groups = SHASH_INITIALIZER(&port_groups); - port_groups_init( - sbrec_port_group_table_get(ovnsb_idl_loop.idl), - &port_groups); - - patch_run(ovs_idl_txn, - ovsrec_bridge_table_get(ovs_idl_loop.idl), - ovsrec_open_vswitch_table_get(ovs_idl_loop.idl), - ovsrec_port_table_get(ovs_idl_loop.idl), - sbrec_port_binding_table_get(ovnsb_idl_loop.idl), - br_int, chassis); + if (br_int) { enum mf_field_id mff_ovn_geneve = ofctrl_run( br_int, &pending_ct_zones); - pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name, - sbrec_datapath_binding_by_key, - sbrec_port_binding_by_datapath, - sbrec_port_binding_by_key, - sbrec_port_binding_by_name, - sbrec_mac_binding_by_lport_ip, - sbrec_dns_table_get(ovnsb_idl_loop.idl), - br_int, chassis, - &local_datapaths, &active_tunnels); - update_ct_zones(&local_lports, &local_datapaths, &ct_zones, - ct_zone_bitmap, &pending_ct_zones); - if (ovs_idl_txn) { - if (ofctrl_can_put()) { - stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME, - time_msec()); - - commit_ct_zones(br_int, &pending_ct_zones); - - struct hmap flow_table = HMAP_INITIALIZER(&flow_table); - lflow_run( - sbrec_chassis_by_name, - sbrec_multicast_group_by_name_datapath, - sbrec_port_binding_by_name, - sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl), - sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl), - sbrec_logical_flow_table_get(ovnsb_idl_loop.idl), - sbrec_mac_binding_table_get(ovnsb_idl_loop.idl), - chassis, - &local_datapaths, &addr_sets, - &port_groups, &active_tunnels, &local_lport_ids, - &flow_table, &group_table, &meter_table); - - if (chassis_id) { - bfd_run( - sbrec_chassis_by_name, + if (chassis) { + struct shash addr_sets = SHASH_INITIALIZER(&addr_sets); + addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl), + &addr_sets); + struct shash port_groups = SHASH_INITIALIZER(&port_groups); + port_groups_init( + sbrec_port_group_table_get(ovnsb_idl_loop.idl), + &port_groups); + + patch_run(ovs_idl_txn, + ovsrec_bridge_table_get(ovs_idl_loop.idl), + ovsrec_open_vswitch_table_get(ovs_idl_loop.idl), + ovsrec_port_table_get(ovs_idl_loop.idl), + sbrec_port_binding_table_get(ovnsb_idl_loop.idl), + br_int, chassis); + + pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name, + sbrec_datapath_binding_by_key, sbrec_port_binding_by_datapath, - ovsrec_interface_table_get(ovs_idl_loop.idl), + sbrec_port_binding_by_key, + sbrec_port_binding_by_name, + sbrec_mac_binding_by_lport_ip, + sbrec_dns_table_get(ovnsb_idl_loop.idl), br_int, chassis, - sbrec_sb_global_table_get(ovnsb_idl_loop.idl), - &local_datapaths); + &local_datapaths, &active_tunnels); + update_ct_zones(&local_lports, &local_datapaths, &ct_zones, + ct_zone_bitmap, &pending_ct_zones); + if (ovs_idl_txn) { + if (ofctrl_can_put()) { + stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME, + time_msec()); + + commit_ct_zones(br_int, &pending_ct_zones); + + struct hmap flow_table = HMAP_INITIALIZER(&flow_table); + lflow_run( + sbrec_chassis_by_name, + sbrec_multicast_group_by_name_datapath, + sbrec_port_binding_by_name, + sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl), + sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl), + sbrec_logical_flow_table_get(ovnsb_idl_loop.idl), + sbrec_mac_binding_table_get(ovnsb_idl_loop.idl), + chassis, + &local_datapaths, &addr_sets, + &port_groups, &active_tunnels, &local_lport_ids, + &flow_table, &group_table, &meter_table); + + if (chassis_id) { + bfd_run( + sbrec_chassis_by_name, + sbrec_port_binding_by_datapath, + ovsrec_interface_table_get(ovs_idl_loop.idl), + br_int, chassis, + sbrec_sb_global_table_get(ovnsb_idl_loop.idl), + &local_datapaths); + } + physical_run( + sbrec_chassis_by_name, + sbrec_port_binding_by_name, + sbrec_multicast_group_table_get( + ovnsb_idl_loop.idl), + sbrec_port_binding_table_get(ovnsb_idl_loop.idl), + mff_ovn_geneve, + br_int, chassis, &ct_zones, + &local_datapaths, &local_lports, + &active_tunnels, + &flow_table); + + stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME, + time_msec()); + + ofctrl_put(&flow_table, &pending_ct_zones, + sbrec_meter_table_get(ovnsb_idl_loop.idl), + get_nb_cfg(sbrec_sb_global_table_get( + ovnsb_idl_loop.idl))); + + hmap_destroy(&flow_table); } - physical_run( - sbrec_chassis_by_name, - sbrec_port_binding_by_name, - sbrec_multicast_group_table_get( - ovnsb_idl_loop.idl), - sbrec_port_binding_table_get(ovnsb_idl_loop.idl), - mff_ovn_geneve, - br_int, chassis, &ct_zones, - &local_datapaths, &local_lports, - &active_tunnels, - &flow_table); - - stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME, - time_msec()); - - ofctrl_put(&flow_table, &pending_ct_zones, - sbrec_meter_table_get(ovnsb_idl_loop.idl), - get_nb_cfg(sbrec_sb_global_table_get( - ovnsb_idl_loop.idl))); - - hmap_destroy(&flow_table); - } - if (ovnsb_idl_txn) { - int64_t cur_cfg = ofctrl_get_cur_cfg(); - if (cur_cfg && cur_cfg != chassis->nb_cfg) { - sbrec_chassis_set_nb_cfg(chassis, cur_cfg); + if (ovnsb_idl_txn) { + int64_t cur_cfg = ofctrl_get_cur_cfg(); + if (cur_cfg && cur_cfg != chassis->nb_cfg) { + sbrec_chassis_set_nb_cfg(chassis, cur_cfg); + } } } - } - if (pending_pkt.conn) { - char *error = ofctrl_inject_pkt(br_int, pending_pkt.flow_s, - &addr_sets, &port_groups); - if (error) { - unixctl_command_reply_error(pending_pkt.conn, error); - free(error); - } else { - unixctl_command_reply(pending_pkt.conn, NULL); + if (pending_pkt.conn) { + char *error = ofctrl_inject_pkt(br_int, pending_pkt.flow_s, + &addr_sets, &port_groups); + if (error) { + unixctl_command_reply_error(pending_pkt.conn, error); + free(error); + } else { + unixctl_command_reply(pending_pkt.conn, NULL); + } + pending_pkt.conn = NULL; + free(pending_pkt.flow_s); } - pending_pkt.conn = NULL; - free(pending_pkt.flow_s); - } - update_sb_monitors(ovnsb_idl_loop.idl, chassis, - &local_lports, &local_datapaths); + update_sb_monitors(ovnsb_idl_loop.idl, chassis, + &local_lports, &local_datapaths); - expr_const_sets_destroy(&addr_sets); - shash_destroy(&addr_sets); - expr_const_sets_destroy(&port_groups); - shash_destroy(&port_groups); + expr_const_sets_destroy(&addr_sets); + shash_destroy(&addr_sets); + expr_const_sets_destroy(&port_groups); + shash_destroy(&port_groups); + } } /* If we haven't handled the pending packet insertion -- 2.1.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
