OVN SB in clustered mode, all the ovn-controller clients connect across all the nodes in a balanced state. When one raft node down and online. All the ovn-controller clients will not migirate back which cause RAFT DB clients unbalanced state. RAFT clients in an unbalanced state would trigger more stress to the SB. This commit introduce one unix command reconnect remote which let user trigger a force reconnect to desisred RAFT node which can adddress the problem. Note: this patch requires ovsdb-idl function ovsdb_idl_set_next_remote.
Reported-at:https://mail.openvswitch.org/pipermail/ovs-discuss/2020-August/050518.html Signed-off-by: Zhen Wang <[email protected]> --- controller/ovn-controller.8.xml | 12 ++++++++++++ controller/ovn-controller.c | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml index 66877314c..66f521398 100644 --- a/controller/ovn-controller.8.xml +++ b/controller/ovn-controller.8.xml @@ -507,6 +507,18 @@ local index so that it can interact with the southbound database again. </p> </dd> + + <dt><code>reconnect</code></dt> + <dd> + <p> + Trigger a force reconnect to one specific remote in Open_vSwitch table + external_ids:ovn-remote. + </p> + <p> + This command is intended to use in the event of clustered SB DB has + unbalanced clients across the raft nodes. + </p> + </dd> </dl> </p> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index ea6a436c0..46ed90492 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -68,6 +68,7 @@ VLOG_DEFINE_THIS_MODULE(main); static unixctl_cb_func ovn_controller_exit; +static unixctl_cb_func ovn_controller_reconnect; static unixctl_cb_func ct_zone_list; static unixctl_cb_func extend_table_list; static unixctl_cb_func inject_pkt; @@ -2078,6 +2079,11 @@ struct ovn_controller_exit_args { bool *restart; }; +struct ovn_controller_reconnect_args { + bool trigger; + char *ovn_remote; +}; + int main(int argc, char *argv[]) { @@ -2085,6 +2091,7 @@ main(int argc, char *argv[]) bool exiting; bool restart; struct ovn_controller_exit_args exit_args = {&exiting, &restart}; + struct ovn_controller_reconnect_args reconnect = {false, NULL}; int retval; ovs_cmdl_proctitle_init(argc, argv); @@ -2103,6 +2110,8 @@ main(int argc, char *argv[]) } unixctl_command_register("exit", "", 0, 1, ovn_controller_exit, &exit_args); + unixctl_command_register("reconnect", "", 1, 1, ovn_controller_reconnect, + &reconnect); daemonize_complete(); @@ -2511,6 +2520,13 @@ main(int argc, char *argv[]) sb_monitor_all); } } + if (reconnect.trigger && ovsdb_idl_is_connected(ovnsb_idl_loop.idl)) { + VLOG_INFO("User triggered force reconnect to %s", reconnect.ovn_remote); + ovsdb_idl_set_next_remote(ovnsb_idl_loop.idl, reconnect.ovn_remote); + ovsdb_idl_force_reconnect(ovnsb_idl_loop.idl); + free(reconnect.ovn_remote); + reconnect.trigger = false; + } } } @@ -2674,6 +2690,7 @@ main(int argc, char *argv[]) ovsdb_idl_loop_destroy(&ovnsb_idl_loop); free(ovs_remote); + free(reconnect.ovn_remote); service_stop(); exit(retval); @@ -2780,6 +2797,16 @@ ovn_controller_exit(struct unixctl_conn *conn, int argc, unixctl_command_reply(conn, NULL); } +static void +ovn_controller_reconnect(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[], void *reconnect_args_) +{ + struct ovn_controller_reconnect_args *reconnect_args = reconnect_args_; + reconnect_args->trigger = true; + reconnect_args->ovn_remote = xstrdup(argv[1]); + unixctl_command_reply(conn, NULL); +} + static void ct_zone_list(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *ct_zones_) -- 2.20.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
