On Wed, Apr 3, 2019 at 10:37 PM Han Zhou <[email protected]> wrote: > From: Han Zhou <[email protected]> > > In the main loop, if the SB DB is disconnected when there is a pending > transaction, there can be busy loop causing 100% CPU of ovn-controller, > until SB DB is connected again. > > The root cause is that when a transaction is pending, ovsdb_idl_loop_run() > will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when > ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not > satisfied and so ofctrl_run() is not executed in the main loop. If there > is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or > OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again > because those messages are not processed because ofctrl_run() is not > invoked. > > This patch fixes the problem by moving ofctrl_run() above and run it > whenever br_int is not NULL, and not care about chassis because this > function doesn't depend on it. > > Note: the changes of this patch is better to be shown with "-w" because > most of them are indent changes. > > Signed-off-by: Han Zhou <[email protected]> >
Acked-by: Numan Siddique <[email protected]> > --- > Notes: > v1->v2: Updates according to Numan's comments. > > ovn/controller/ovn-controller.c | 197 > ++++++++++++++++++++-------------------- > 1 file changed, 100 insertions(+), 97 deletions(-) > > diff --git a/ovn/controller/ovn-controller.c > b/ovn/controller/ovn-controller.c > index 882cc19..e924909 100644 > --- a/ovn/controller/ovn-controller.c > +++ b/ovn/controller/ovn-controller.c > @@ -721,116 +721,119 @@ main(int argc, char *argv[]) > &active_tunnels, &local_datapaths, > &local_lports, &local_lport_ids); > } > - if (br_int && chassis) { > - struct shash addr_sets = SHASH_INITIALIZER(&addr_sets); > - > addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl), > - &addr_sets); > - struct shash port_groups = > SHASH_INITIALIZER(&port_groups); > - port_groups_init( > - sbrec_port_group_table_get(ovnsb_idl_loop.idl), > - &port_groups); > - > - patch_run(ovs_idl_txn, > - ovsrec_bridge_table_get(ovs_idl_loop.idl), > - ovsrec_open_vswitch_table_get(ovs_idl_loop.idl), > - ovsrec_port_table_get(ovs_idl_loop.idl), > - > sbrec_port_binding_table_get(ovnsb_idl_loop.idl), > - br_int, chassis); > > + if (br_int) { > enum mf_field_id mff_ovn_geneve = ofctrl_run( > br_int, &pending_ct_zones); > > - pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name, > - sbrec_datapath_binding_by_key, > - sbrec_port_binding_by_datapath, > - sbrec_port_binding_by_key, > - sbrec_port_binding_by_name, > - sbrec_mac_binding_by_lport_ip, > - sbrec_dns_table_get(ovnsb_idl_loop.idl), > - br_int, chassis, > - &local_datapaths, &active_tunnels); > - update_ct_zones(&local_lports, &local_datapaths, > &ct_zones, > - ct_zone_bitmap, &pending_ct_zones); > - if (ovs_idl_txn) { > - if (ofctrl_can_put()) { > - stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME, > - time_msec()); > - > - commit_ct_zones(br_int, &pending_ct_zones); > - > - struct hmap flow_table = > HMAP_INITIALIZER(&flow_table); > - lflow_run( > - sbrec_chassis_by_name, > - sbrec_multicast_group_by_name_datapath, > - sbrec_port_binding_by_name, > - > sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl), > - > sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl), > - > sbrec_logical_flow_table_get(ovnsb_idl_loop.idl), > - > sbrec_mac_binding_table_get(ovnsb_idl_loop.idl), > - chassis, > - &local_datapaths, &addr_sets, > - &port_groups, &active_tunnels, > &local_lport_ids, > - &flow_table, &group_table, &meter_table); > - > - if (chassis_id) { > - bfd_run( > - sbrec_chassis_by_name, > + if (chassis) { > + struct shash addr_sets = > SHASH_INITIALIZER(&addr_sets); > + > addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl), > + &addr_sets); > + struct shash port_groups = > SHASH_INITIALIZER(&port_groups); > + port_groups_init( > + sbrec_port_group_table_get(ovnsb_idl_loop.idl), > + &port_groups); > + > + patch_run(ovs_idl_txn, > + ovsrec_bridge_table_get(ovs_idl_loop.idl), > + > ovsrec_open_vswitch_table_get(ovs_idl_loop.idl), > + ovsrec_port_table_get(ovs_idl_loop.idl), > + > sbrec_port_binding_table_get(ovnsb_idl_loop.idl), > + br_int, chassis); > + > + pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name, > + sbrec_datapath_binding_by_key, > sbrec_port_binding_by_datapath, > - > ovsrec_interface_table_get(ovs_idl_loop.idl), > + sbrec_port_binding_by_key, > + sbrec_port_binding_by_name, > + sbrec_mac_binding_by_lport_ip, > + sbrec_dns_table_get(ovnsb_idl_loop.idl), > br_int, chassis, > - > sbrec_sb_global_table_get(ovnsb_idl_loop.idl), > - &local_datapaths); > + &local_datapaths, &active_tunnels); > + update_ct_zones(&local_lports, &local_datapaths, > &ct_zones, > + ct_zone_bitmap, &pending_ct_zones); > + if (ovs_idl_txn) { > + if (ofctrl_can_put()) { > + > stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME, > + time_msec()); > + > + commit_ct_zones(br_int, &pending_ct_zones); > + > + struct hmap flow_table = > HMAP_INITIALIZER(&flow_table); > + lflow_run( > + sbrec_chassis_by_name, > + sbrec_multicast_group_by_name_datapath, > + sbrec_port_binding_by_name, > + > sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl), > + > sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl), > + > sbrec_logical_flow_table_get(ovnsb_idl_loop.idl), > + > sbrec_mac_binding_table_get(ovnsb_idl_loop.idl), > + chassis, > + &local_datapaths, &addr_sets, > + &port_groups, &active_tunnels, > &local_lport_ids, > + &flow_table, &group_table, &meter_table); > + > + if (chassis_id) { > + bfd_run( > + sbrec_chassis_by_name, > + sbrec_port_binding_by_datapath, > + > ovsrec_interface_table_get(ovs_idl_loop.idl), > + br_int, chassis, > + > sbrec_sb_global_table_get(ovnsb_idl_loop.idl), > + &local_datapaths); > + } > + physical_run( > + sbrec_chassis_by_name, > + sbrec_port_binding_by_name, > + sbrec_multicast_group_table_get( > + ovnsb_idl_loop.idl), > + > sbrec_port_binding_table_get(ovnsb_idl_loop.idl), > + mff_ovn_geneve, > + br_int, chassis, &ct_zones, > + &local_datapaths, &local_lports, > + &active_tunnels, > + &flow_table); > + > + stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME, > + time_msec()); > + > + ofctrl_put(&flow_table, &pending_ct_zones, > + > sbrec_meter_table_get(ovnsb_idl_loop.idl), > + > get_nb_cfg(sbrec_sb_global_table_get( > + > ovnsb_idl_loop.idl))); > + > + hmap_destroy(&flow_table); > } > - physical_run( > - sbrec_chassis_by_name, > - sbrec_port_binding_by_name, > - sbrec_multicast_group_table_get( > - ovnsb_idl_loop.idl), > - > sbrec_port_binding_table_get(ovnsb_idl_loop.idl), > - mff_ovn_geneve, > - br_int, chassis, &ct_zones, > - &local_datapaths, &local_lports, > - &active_tunnels, > - &flow_table); > - > - stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME, > - time_msec()); > - > - ofctrl_put(&flow_table, &pending_ct_zones, > - > sbrec_meter_table_get(ovnsb_idl_loop.idl), > - get_nb_cfg(sbrec_sb_global_table_get( > - ovnsb_idl_loop.idl))); > - > - hmap_destroy(&flow_table); > - } > - if (ovnsb_idl_txn) { > - int64_t cur_cfg = ofctrl_get_cur_cfg(); > - if (cur_cfg && cur_cfg != chassis->nb_cfg) { > - sbrec_chassis_set_nb_cfg(chassis, cur_cfg); > + if (ovnsb_idl_txn) { > + int64_t cur_cfg = ofctrl_get_cur_cfg(); > + if (cur_cfg && cur_cfg != chassis->nb_cfg) { > + sbrec_chassis_set_nb_cfg(chassis, > cur_cfg); > + } > } > } > - } > > - if (pending_pkt.conn) { > - char *error = ofctrl_inject_pkt(br_int, > pending_pkt.flow_s, > - &addr_sets, > &port_groups); > - if (error) { > - unixctl_command_reply_error(pending_pkt.conn, > error); > - free(error); > - } else { > - unixctl_command_reply(pending_pkt.conn, NULL); > + if (pending_pkt.conn) { > + char *error = ofctrl_inject_pkt(br_int, > pending_pkt.flow_s, > + &addr_sets, > &port_groups); > + if (error) { > + unixctl_command_reply_error(pending_pkt.conn, > error); > + free(error); > + } else { > + unixctl_command_reply(pending_pkt.conn, NULL); > + } > + pending_pkt.conn = NULL; > + free(pending_pkt.flow_s); > } > - pending_pkt.conn = NULL; > - free(pending_pkt.flow_s); > - } > > - update_sb_monitors(ovnsb_idl_loop.idl, chassis, > - &local_lports, &local_datapaths); > + update_sb_monitors(ovnsb_idl_loop.idl, chassis, > + &local_lports, &local_datapaths); > > - expr_const_sets_destroy(&addr_sets); > - shash_destroy(&addr_sets); > - expr_const_sets_destroy(&port_groups); > - shash_destroy(&port_groups); > + expr_const_sets_destroy(&addr_sets); > + shash_destroy(&addr_sets); > + expr_const_sets_destroy(&port_groups); > + shash_destroy(&port_groups); > + } > } > > /* If we haven't handled the pending packet insertion > -- > 2.1.0 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
