On Thu, Nov 30, 2017 at 11:35 PM, Numan Siddique <[email protected]> wrote:
> This patch needs so more testing. In one of my deployments I see no master > is promoted and all the ovn db resources running in 3 nodes are in slave > mode. In my previous testing it worked fine though. > > I will fix this and submit a v3. > I tested it again thorougly and it is working fine. I noticed the issue with tripleo deployment with ovn db servers running as docker services (using pacemaker bundle resource). I have submitted v3 here - https://patchwork.ozlabs.org/patch/844113/ which fixes that. The tripleo docker deployment requires a small fix in puppet-tripleo here - https://github.com/openstack/puppet-tripleo/blob/master/manifests/profile/pacemaker/ovn_dbs_bundle.pp#L149. We need to remove 'container-attribute-target=host' and with the v3 of the patch it works fine. Thanks Numan > Thanks > Numan > > > On Thu, Nov 30, 2017 at 4:12 PM, <[email protected]> wrote: > >> From: Numan Siddique <[email protected]> >> >> Pacemaker Resource agent periodically calls the OVN OCF's "monitor" action >> periodically to check the status. But the OVN OCF script doesn't add the >> action "monitor" for the role "Master" because of which the pacemaker >> resource agent do not call the "monitor" action at all for the master. >> In case OVN db servers exit for some reason this totally gets undetected >> and one of the standby node is not promoted to master. >> >> This patch adds the monitor action for "Master" role. Also the monitor >> action do not check for the status of the ovn-northd (if manage_northd is >> yes). >> This patch also checks for the status of the ovn-northd in the monitor >> action >> for the "Master" role. If any of the ovsdb-server or ovn-northd is not >> running, >> monitor action will return OCF_NOT_RUNNING and this will cause the >> pacemaker >> to restart the OVN OCF resource. >> >> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1512568 >> Signed-off-by: Numan Siddique <[email protected]> >> CC: Russel Bryant <[email protected]> >> --- >> >> v1 -> v2 >> ----- >> Reverted the change to use 'ocf_attribute_target' as this function is >> only availabe in pacemaker 1.1.16-12 >> >> ovn/utilities/ovndb-servers.ocf | 49 ++++++++++++++++++++++++++++++ >> +++-------- >> 1 file changed, 40 insertions(+), 9 deletions(-) >> >> diff --git a/ovn/utilities/ovndb-servers.ocf >> b/ovn/utilities/ovndb-servers.ocf >> index 3f3008700..4b87c9e20 100755 >> --- a/ovn/utilities/ovndb-servers.ocf >> +++ b/ovn/utilities/ovndb-servers.ocf >> @@ -120,7 +120,11 @@ ovsdb_server_metadata() { >> <action name="stop" timeout="20s" /> >> <action name="promote" timeout="50s" /> >> <action name="demote" timeout="50s" /> >> - <action name="monitor" timeout="20s" depth="0" interval="10s" >> /> >> + <action name="monitor" timeout="20s" depth="0" interval="30s" >> /> >> + <action name="monitor" timeout="20s" depth="0" interval="10s" >> + role="Master" /> >> + <action name="monitor" timeout="20s" depth="0" interval="30s" >> + role="Slave"/> >> <action name="meta-data" timeout="5s" /> >> <action name="validate-all" timeout="20s" /> >> </actions> >> @@ -247,7 +251,7 @@ ovsdb_server_master_update() { >> } >> >> ovsdb_server_monitor() { >> - ovsdb_server_check_status >> + ovsdb_server_check_status $@ >> rc=$? >> >> ovsdb_server_master_update $rc >> @@ -262,8 +266,21 @@ ovsdb_server_check_status() { >> return $OCF_SUCCESS >> fi >> >> + check_northd="no" >> + if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; then >> + check_northd="yes" >> + fi >> + >> if [[ $sb_status == "running/active" && $nb_status == >> "running/active" ]]; then >> - return $OCF_RUNNING_MASTER >> + if [ "$check_northd" == "yes" ]; then >> + # Verify if ovn-northd is running or not. >> + ${OVN_CTL} status_northd | grep "ovn-northd is running" >> + if [ "$?" == "0" ] ; then >> + return $OCF_RUNNING_MASTER >> + fi >> + else >> + return $OCF_RUNNING_MASTER >> + fi >> fi >> >> # TODO: What about service running but not in either state above? >> @@ -317,8 +334,13 @@ ovsdb_server_start() { >> $@ start_ovsdb >> >> while [ 1 = 1 ]; do >> - # It is important that we don't return until we're in a >> functional state >> - ovsdb_server_monitor >> + # It is important that we don't return until we're in a >> functional >> + # state. When checking the status of the ovsdb-server's ignore >> northd. >> + # It is possible that when the resource is restarted >> ovsdb-server's >> + # can be started as masters and ovn-northd would not have been >> started. >> + # ovn-northd will be started once a node is promoted to master >> and >> + # 'manage_northd' is set to yes. >> + ovsdb_server_monitor ignore_northd >> rc=$? >> case $rc in >> $OCF_SUCCESS) return $rc;; >> @@ -350,7 +372,7 @@ ovsdb_server_stop() { >> ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd >> fi >> >> - ovsdb_server_check_status >> + ovsdb_server_check_status ignore_northd >> case $? in >> $OCF_NOT_RUNNING) return ${OCF_SUCCESS};; >> esac >> @@ -360,7 +382,7 @@ ovsdb_server_stop() { >> >> while [ 1 = 1 ]; do >> # It is important that we don't return until we're stopped >> - ovsdb_server_check_status >> + ovsdb_server_check_status ignore_northd >> rc=$? >> case $rc in >> $OCF_SUCCESS) >> @@ -381,7 +403,7 @@ ovsdb_server_stop() { >> } >> >> ovsdb_server_promote() { >> - ovsdb_server_check_status >> + ovsdb_server_check_status ignore_northd >> rc=$? >> case $rc in >> ${OCF_SUCCESS}) ;; >> @@ -395,6 +417,11 @@ ovsdb_server_promote() { >> ${OVN_CTL} promote_ovnnb >> ${OVN_CTL} promote_ovnsb >> >> + if [ "$MANAGE_NORTHD" = "yes" ]; then >> + # Startup ovn-northd service >> + ${OVN_CTL} --ovn-manage-ovsdb=no start_northd >> + fi >> + >> ocf_log debug "ovndb_servers: Promoting $host_name as the master" >> # Record ourselves so that the agent has a better chance of doing >> # the right thing at startup >> @@ -404,7 +431,7 @@ ovsdb_server_promote() { >> } >> >> ovsdb_server_demote() { >> - ovsdb_server_check_status >> + ovsdb_server_check_status ignore_northd >> if [ $? = $OCF_NOT_RUNNING ]; then >> return $OCF_NOT_RUNNING >> fi >> @@ -452,6 +479,10 @@ ovsdb_server_demote() { >> ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVAL >> ID_IP_ADDRESS} >> fi >> >> + if [ "$MANAGE_NORTHD" = "yes" ]; then >> + # Stop ovn-northd service >> + ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd >> + fi >> ovsdb_server_master_update $OCF_SUCCESS >> return $OCF_SUCCESS >> } >> -- >> 2.14.3 >> >> > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
