On Tue, Aug 1, 2017 at 9:19 AM, Russell Bryant <russ...@ovn.org> wrote:
>
> Add native support for active-standby HA in ovn-northd by having each
> instance attempt to acquire an OVSDB lock.  Only the instance of
> ovn-northd that currently holds the lock will make active changes to
> the OVN databases.
>
> Signed-off-by: Russell Bryant <russ...@ovn.org>
> ---
>  NEWS                        |  1 +
>  ovn/northd/ovn-northd.8.xml |  9 +++++++++
>  ovn/northd/ovn-northd.c     | 40 +++++++++++++++++++++++++++++++---------
>  3 files changed, 41 insertions(+), 9 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index facea0228..f3cdd2443 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -49,6 +49,7 @@ Post-v2.7.0
>         one chassis is specified, OVN will manage high availability for
that
>         gateway.
>       * Add support for ACL logging.
> +     * ovn-northd now has native support for active-standby high
availability.
>     - Tracing with ofproto/trace now traces through recirculation.
>     - OVSDB:
>       * New support for role-based access control (see ovsdb-server(1)).
> diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
> index 1527e8a60..0d85ec0d2 100644
> --- a/ovn/northd/ovn-northd.8.xml
> +++ b/ovn/northd/ovn-northd.8.xml
> @@ -72,6 +72,15 @@
>        </dl>
>      </p>
>
> +    <h1>Active-Standby for High Availability</h1>
> +    <p>
> +      You may run <code>ovn-northd</code> more than once in an OVN
deployment.
> +      OVN will automatically ensure that only one of them is active at a
time.
> +      If multiple instances of <code>ovn-northd</code> are running and
the
> +      active <code>ovn-northd</code> fails, one of the hot standby
instances
> +      of <code>ovn-northd</code> will automatically take over.
> +    </p>
> +
>      <h1>Logical Flow Table Structure</h1>
>
>      <p>
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index 10e0c7ce0..3d2be4267 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -6531,6 +6531,12 @@ main(int argc, char *argv[])
>      ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
>      ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
>
> +    /* Ensure that only a single ovn-northd is active in the deployment
by
> +     * acquiring a lock called "ovn_northd" on the southbound database
> +     * and then only performing DB transactions if the lock is held. */
> +    ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
> +    bool had_lock = false;
> +
>      /* Main loop. */
>      exiting = false;
>      while (!exiting) {
> @@ -6541,15 +6547,29 @@ main(int argc, char *argv[])
>              .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
>          };
>
> -        struct chassis_index chassis_index;
> -        chassis_index_init(&chassis_index, ctx.ovnsb_idl);
> +        if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
> +            VLOG_INFO("ovn-northd lock acquired. "
> +                      "This ovn-northd instance is now active.");
> +            had_lock = true;
> +        } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
> +            VLOG_INFO("ovn-northd lock lost. "
> +                      "This ovn-northd instance is now on standby.");

Should it try lock again, if we want it to be standby? Otherwise, this
instance won't have a chance to be active any more.

> +            had_lock = false;
> +        }
>
> -        ovnnb_db_run(&ctx, &chassis_index, &ovnsb_idl_loop);
> -        ovnsb_db_run(&ctx, &ovnsb_idl_loop);
> -        if (ctx.ovnsb_txn) {
> -            check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
> -            check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
> -            check_and_update_rbac(&ctx);
> +        struct chassis_index chassis_index;
> +        bool destroy_chassis_index = false;
> +        if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
> +            chassis_index_init(&chassis_index, ctx.ovnsb_idl);
> +            destroy_chassis_index = true;
> +
> +            ovnnb_db_run(&ctx, &chassis_index, &ovnsb_idl_loop);
> +            ovnsb_db_run(&ctx, &ovnsb_idl_loop);
> +            if (ctx.ovnsb_txn) {
> +                check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
> +                check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
> +                check_and_update_rbac(&ctx);
> +            }
>          }
>
>          unixctl_server_run(unixctl);
> @@ -6565,7 +6585,9 @@ main(int argc, char *argv[])
>              exiting = true;
>          }
>
> -        chassis_index_destroy(&chassis_index);
> +        if (destroy_chassis_index) {
> +            chassis_index_destroy(&chassis_index);
> +        }
>      }
>
>      unixctl_server_destroy(unixctl);
> --
> 2.13.3
>

Acked-by: Han Zhou <zhou...@gmail.com>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to