On Thu, Nov 30, 2017 at 11:35 PM, Numan Siddique <[email protected]>
wrote:

> This patch needs so more testing. In one of my deployments I see no master
> is promoted and all the ovn db resources running in 3 nodes are in slave
> mode. In my previous testing it worked fine though.
>
> I will fix this and submit a v3.
>

I tested it again thorougly and it is working fine. I noticed the issue
with tripleo deployment with ovn db servers running as docker services
(using pacemaker bundle resource). I have submitted v3 here -
https://patchwork.ozlabs.org/patch/844113/ which fixes that.  The tripleo
docker deployment requires a small fix in puppet-tripleo here -
https://github.com/openstack/puppet-tripleo/blob/master/manifests/profile/pacemaker/ovn_dbs_bundle.pp#L149.
We need to remove 'container-attribute-target=host' and with the v3 of the
patch it works fine.

Thanks
Numan





> Thanks
> Numan
>
>
> On Thu, Nov 30, 2017 at 4:12 PM, <[email protected]> wrote:
>
>> From: Numan Siddique <[email protected]>
>>
>> Pacemaker Resource agent periodically calls the OVN OCF's "monitor" action
>> periodically to check the status. But the OVN OCF script doesn't add the
>> action "monitor" for the role "Master" because of which the pacemaker
>> resource agent do not call the "monitor" action at all for the master.
>> In case OVN db servers exit for some reason this totally gets undetected
>> and one of the standby node is not promoted to master.
>>
>> This patch adds the monitor action for "Master" role. Also the monitor
>> action do not check for the status of the ovn-northd (if manage_northd is
>> yes).
>> This patch also checks for the status of the ovn-northd in the monitor
>> action
>> for the "Master" role. If any of the ovsdb-server or ovn-northd is not
>> running,
>> monitor action will return OCF_NOT_RUNNING and this will cause the
>> pacemaker
>> to restart the OVN OCF resource.
>>
>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1512568
>> Signed-off-by: Numan Siddique <[email protected]>
>> CC: Russel Bryant <[email protected]>
>> ---
>>
>> v1 -> v2
>> -----
>> Reverted the change to use 'ocf_attribute_target' as this function is
>> only availabe in pacemaker 1.1.16-12
>>
>>  ovn/utilities/ovndb-servers.ocf | 49 ++++++++++++++++++++++++++++++
>> +++--------
>>  1 file changed, 40 insertions(+), 9 deletions(-)
>>
>> diff --git a/ovn/utilities/ovndb-servers.ocf
>> b/ovn/utilities/ovndb-servers.ocf
>> index 3f3008700..4b87c9e20 100755
>> --- a/ovn/utilities/ovndb-servers.ocf
>> +++ b/ovn/utilities/ovndb-servers.ocf
>> @@ -120,7 +120,11 @@ ovsdb_server_metadata() {
>>      <action name="stop"         timeout="20s" />
>>      <action name="promote"      timeout="50s" />
>>      <action name="demote"       timeout="50s" />
>> -    <action name="monitor"      timeout="20s"  depth="0" interval="10s"
>> />
>> +    <action name="monitor"      timeout="20s"  depth="0" interval="30s"
>> />
>> +    <action name="monitor"      timeout="20s"  depth="0" interval="10s"
>> +     role="Master" />
>> +    <action name="monitor"      timeout="20s"  depth="0" interval="30s"
>> +     role="Slave"/>
>>      <action name="meta-data"    timeout="5s" />
>>      <action name="validate-all" timeout="20s" />
>>    </actions>
>> @@ -247,7 +251,7 @@ ovsdb_server_master_update() {
>>  }
>>
>>  ovsdb_server_monitor() {
>> -    ovsdb_server_check_status
>> +    ovsdb_server_check_status $@
>>      rc=$?
>>
>>      ovsdb_server_master_update $rc
>> @@ -262,8 +266,21 @@ ovsdb_server_check_status() {
>>          return $OCF_SUCCESS
>>      fi
>>
>> +    check_northd="no"
>> +    if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; then
>> +        check_northd="yes"
>> +    fi
>> +
>>      if [[ $sb_status == "running/active" && $nb_status ==
>> "running/active" ]]; then
>> -        return $OCF_RUNNING_MASTER
>> +        if [ "$check_northd" == "yes" ]; then
>> +            # Verify if ovn-northd is running or not.
>> +            ${OVN_CTL} status_northd | grep "ovn-northd is running"
>> +            if [ "$?" == "0" ] ; then
>> +                return $OCF_RUNNING_MASTER
>> +            fi
>> +        else
>> +            return $OCF_RUNNING_MASTER
>> +        fi
>>      fi
>>
>>      # TODO: What about service running but not in either state above?
>> @@ -317,8 +334,13 @@ ovsdb_server_start() {
>>      $@ start_ovsdb
>>
>>      while [ 1 = 1 ]; do
>> -        # It is important that we don't return until we're in a
>> functional state
>> -        ovsdb_server_monitor
>> +        # It is important that we don't return until we're in a
>> functional
>> +        # state. When checking the status of the ovsdb-server's ignore
>> northd.
>> +        # It is possible that when the resource is restarted
>> ovsdb-server's
>> +        # can be started as masters and ovn-northd would not have been
>> started.
>> +        # ovn-northd will be started once a node is promoted to master
>> and
>> +        # 'manage_northd' is set to yes.
>> +        ovsdb_server_monitor ignore_northd
>>          rc=$?
>>          case $rc in
>>              $OCF_SUCCESS)        return $rc;;
>> @@ -350,7 +372,7 @@ ovsdb_server_stop() {
>>          ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
>>      fi
>>
>> -    ovsdb_server_check_status
>> +    ovsdb_server_check_status ignore_northd
>>      case $? in
>>          $OCF_NOT_RUNNING)    return ${OCF_SUCCESS};;
>>      esac
>> @@ -360,7 +382,7 @@ ovsdb_server_stop() {
>>
>>      while [ 1 = 1 ]; do
>>          # It is important that we don't return until we're stopped
>> -        ovsdb_server_check_status
>> +        ovsdb_server_check_status ignore_northd
>>          rc=$?
>>          case $rc in
>>          $OCF_SUCCESS)
>> @@ -381,7 +403,7 @@ ovsdb_server_stop() {
>>  }
>>
>>  ovsdb_server_promote() {
>> -    ovsdb_server_check_status
>> +    ovsdb_server_check_status ignore_northd
>>      rc=$?
>>      case $rc in
>>          ${OCF_SUCCESS}) ;;
>> @@ -395,6 +417,11 @@ ovsdb_server_promote() {
>>      ${OVN_CTL} promote_ovnnb
>>      ${OVN_CTL} promote_ovnsb
>>
>> +    if [ "$MANAGE_NORTHD" = "yes" ]; then
>> +        # Startup ovn-northd service
>> +        ${OVN_CTL} --ovn-manage-ovsdb=no start_northd
>> +    fi
>> +
>>      ocf_log debug "ovndb_servers: Promoting $host_name as the master"
>>      # Record ourselves so that the agent has a better chance of doing
>>      # the right thing at startup
>> @@ -404,7 +431,7 @@ ovsdb_server_promote() {
>>  }
>>
>>  ovsdb_server_demote() {
>> -    ovsdb_server_check_status
>> +    ovsdb_server_check_status ignore_northd
>>      if [ $? = $OCF_NOT_RUNNING ]; then
>>          return $OCF_NOT_RUNNING
>>      fi
>> @@ -452,6 +479,10 @@ ovsdb_server_demote() {
>>          ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVAL
>> ID_IP_ADDRESS}
>>      fi
>>
>> +    if [ "$MANAGE_NORTHD" = "yes" ]; then
>> +        # Stop ovn-northd service
>> +        ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
>> +    fi
>>      ovsdb_server_master_update $OCF_SUCCESS
>>      return $OCF_SUCCESS
>>  }
>> --
>> 2.14.3
>>
>>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to