Re: [devel] [PATCH 3/3] amf: add a sample app for SC status change callback [#2475]

2017-06-27 Thread Gary Lee
Hi Praveen

One comment below:

On 1/6/17, 9:46 pm, "Praveen"  wrote:

---
 samples/amf/sa_aware/Makefile.am |  13 +++-
 samples/amf/sa_aware/README  |   8 +++
 samples/amf/sa_aware/amf_sc_status_app.c | 115 
+++
 3 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 samples/amf/sa_aware/amf_sc_status_app.c

diff --git a/samples/amf/sa_aware/Makefile.am 
b/samples/amf/sa_aware/Makefile.am
index c34f94a..d8dbbd1 100644
--- a/samples/amf/sa_aware/Makefile.am
+++ b/samples/amf/sa_aware/Makefile.am
@@ -24,7 +24,7 @@ EXTRA_DIST = \
AppConfig-nwayactive.xml \
README
 
-bin_PROGRAMS = amf_demo
+bin_PROGRAMS = amf_demo amfscstatusdemo
 
 amf_demo_CPPFLAGS = \
-DSA_EXTENDED_NAME_SOURCE \
@@ -36,10 +36,21 @@ amf_demo_SOURCES = \
 amf_demo_LDADD = \
@SAF_AIS_AMF_LIBS@
 
+amfscstatusdemo_CPPFLAGS = \
+-DSA_EXTENDED_NAME_SOURCE \
+$(AM_CPPFLAGS)
+
+amfscstatusdemo_SOURCES = \
+amf_sc_status_app.c
+
+amfscstatusdemo_LDADD = \
+@SAF_AIS_AMF_LIBS@
+
 install-data-hook:
mkdir -p /opt/amf_demo
cp amf_demo /opt/amf_demo
cp amf_demo_script /opt/amf_demo
+   cp amfscstatusdemo /opt/amf_demo
 
[GL] I had to replace with ‘cp amfscstatusdemo $(DESTDIR)/opt/amf_demo’ to 
reflect the latest version.

 uninstall-hook:
rm -rf /opt/amf_demo
diff --git a/samples/amf/sa_aware/README b/samples/amf/sa_aware/README
index e8e4539..94a639a 100644
--- a/samples/amf/sa_aware/README
+++ b/samples/amf/sa_aware/README
@@ -34,3 +34,11 @@ Some steps to follow:
 6. Run below command for invocation of CSI Attribute Change Callback :
immcfg -a saAmfCSIAttriValue+= 
safCsiAttr=AmfDemo1,safCsi=AmfDemo,safSi=AmfDemo,safApp=AmfDemo1
 
+Steps to run amfscstatusdemo:
+1)Run amfscstatusdemo from a terminal on a payload node.
+2)Bring down controllers.
+3)amfscstatusdemo will exit with message:
+  "Received SC Status Change Callback: SC status Absent"
+4)When SCs are down bring one controler up, amfscstatusdemo will
+  exit with message
+ "Received SC Status Change Callback: SC status Present"
diff --git a/samples/amf/sa_aware/amf_sc_status_app.c 
b/samples/amf/sa_aware/amf_sc_status_app.c
new file mode 100644
index 000..b2585b8
--- /dev/null
+++ b/samples/amf/sa_aware/amf_sc_status_app.c
@@ -0,0 +1,115 @@
+/*  -*- OpenSAF  -*-
+ *
+ * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
+ * under the GNU Lesser General Public License Version 2.1, February 1999.
+ * The complete license can be accessed from the following location:
+ * http://opensource.org/licenses/lgpl-license.php
+ * See the Copying file included with the OpenSAF distribution for full
+ * licensing terms.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+char path[256];
+FILE *fp;
+
+/*
+ * @brief   AMF will invoke this callback whenever SCs goes down and
+ *  joins back.
+ */
+static void my_sc_status_cbk(OsafAmfSCStatusT state) {
+  fp = fopen(path, "w");
+  if (fp == NULL) {
+exit(EXIT_FAILURE);
+  }
+  if (state == OSAF_AMF_SC_PRESENT) {
+fprintf(stdout, "\nReceived SC Status Change Callback: SC status 
Present\n\n");
+fprintf(fp, "\nReceived SC Status Change Callback: SC status 
Present\n\n");
+  } else if(state == OSAF_AMF_SC_ABSENT) {
+fprintf(stdout, "\nReceived SC Status Change Callback: SC status 
Absent\n\n");
+fprintf(fp, "\nReceived SC Status Change Callback: SC status 
Absent\n\n");
+  }
+  fclose(fp);
+}
+
+int main(int argc, char **argv)
+{
+  SaAisErrorT rc;
+  struct pollfd fds[1];
+  int inotify_fd, inotify_fd2;
+  SaAmfCallbacksT_4 amf_cbk = {0};
+  SaVersionT ver = {'B', 0x04, 0x01};
+  SaAmfHandleT amf_hdl;
+
+  //Open log file. Log pid, a message and close it.
+  snprintf(path, sizeof(path), "%s/amfscstatusdemo.log", "/tmp");
+  fp = fopen(path, "w");
+  if (fp == NULL) {
+fprintf(stderr, " log file open FAILED:%u", rc);
+goto done;
+  }
+  fprintf(fp, "%d\n", getpid());
+  fclose(fp);
+
+  //Initialize with AMF.
+  rc = saAmfInitialize_4(_hdl, _cbk, );
+  if (rc != SA_AIS_OK) {
+  

Re: [devel] [PATCH 1/1] clmtest: update non-member node_id for new test environment [#2512]

2017-06-27 Thread praveen malviya

Ack.

Thanks,
Praveen

On 27-Jun-17 3:09 PM, Hoang Vo wrote:

clmtest 7 7 check saClmClusterNodeGet with non-member node,
previously designed as 0x2060F.
change test node_id to 0x2990F to avoid future conflict
when test environment is upgraded.
---
  src/clm/apitest/tet_saClmClusterNodeGet.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/clm/apitest/tet_saClmClusterNodeGet.c 
b/src/clm/apitest/tet_saClmClusterNodeGet.c
index 51683f9..c26939d 100644
--- a/src/clm/apitest/tet_saClmClusterNodeGet.c
+++ b/src/clm/apitest/tet_saClmClusterNodeGet.c
@@ -126,7 +126,7 @@ void saClmClusterNodeGet_06(void)
  
  void saClmClusterNodeGet_07(void)

  {
-   nodeId = 132623; /*node is non member*/
+   nodeId = 170255; /*node is non member, 0x2990F*/
safassert(saClmInitialize(, _1, _1),
  SA_AIS_OK);
rc = saClmClusterNodeGet(clmHandle, nodeId, timeout, _1);
@@ -134,7 +134,7 @@ void saClmClusterNodeGet_07(void)
/*test_validate(rc, SA_AIS_ERR_UNAVAILABLE);*/
test_validate(rc, SA_AIS_ERR_NOT_EXIST);
  
-	nodeId = 132623;

+   nodeId = 170255;
safassert(saClmInitialize_4(, _4, _4),
  SA_AIS_OK);
rc = saClmClusterNodeGet_4(clmHandle, nodeId, timeout, _4);



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 0/1] Review Request for ntfd: Ensure mutex is not taken after cnsurvail_thread is canceled V2 [#2508]

2017-06-27 Thread Minh Chau
Summary: ntfd: Ensure mutex is not taken after cnsurvail_thread is canceled V2 
[#2508]
Review request for Ticket(s): 2508
Peer Reviewer(s): Lennart, Praveen
Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE ***
Affected branch(es): develop
Development branch: ticket-2508
Base revision: 829519a4f3a86eb836a55be8301fd5d2befeeec3
Personal repository: git://git.code.sf.net/u/minh-chau/review


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-
*** EXPLAIN/COMMENT THE PATCH SERIES HERE ***

revision 2fc800033744a0201dd1b8335d64ca7ae7bcfe59
Author: Minh Chau 
Date:   Wed, 28 Jun 2017 14:17:19 +1000

ntfd: Ensure mutex is not taken after cnsurvail_thread is canceled V2 [#2508]

In the scenario of shutting down SC while SC switchover is on going,
ntfd coredump is generated due to failure of pthread_mutex_destroy()
with errorcode:16(EBUSY). That means the mutex had been taken and
was not unlocked at the time phtread_mutex_destroy() is called.

This patch changes the way ntfd stops ntfimcn and cnsruvail_thread()
so that the cnsurvai_thread does not restart ntfimcn in stop sequence.
Therefore, when cnsurval_thread receives cancellation request, this
thread does not do anything that may lead to cancellation point with
a locked mutex.



Complete diffstat:
--
 src/ntf/ntfd/ntfs_imcnutil.c | 58 +---
 1 file changed, 39 insertions(+), 19 deletions(-)


Testing Commands:
-
Run a test of switchover and shutting down SC multiple times
(It was 20 times of test execution that triggered to ntfd coredump)

Testing, Expected Results:
--
no coredump


Conditions of Submission:
-
ack from reviewer


Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.gitconfig file (i.e. user.name, user.email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, 

Re: [devel] [PATCH 1/1] amfnd: retry on ERR_NOT_EXIST [#2490]

2017-06-27 Thread Gary Lee
Hi

I would like to push this patch on Friday if there are no objections.

For the medium term, we should consider making AMFND an applier to avoid these 
issues?

Thanks
Gary

On 15/6/17, 5:41 pm, "Gary Lee"  wrote:

On a congested network, sometimes a newly created IMM object can take some
time to be available on other nodes.

In our test, a new SU is created on SC-1 and unlocked. But sometimes
it fails on a remote node due to:

2017-05-19 13:55:19 SC-2 osafamfnd[258]: ER amf_saImmOmSearchInitialize_o2 
failed: 12

To get around this, we will retry on SA_AIS_ERR_NOT_EXIST a few times.
---
 src/amf/amfnd/util.cc | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/amf/amfnd/util.cc b/src/amf/amfnd/util.cc
index ed0905ce2..bca642eac 100644
--- a/src/amf/amfnd/util.cc
+++ b/src/amf/amfnd/util.cc
@@ -38,6 +38,9 @@
 #include 
 #include "osaf/configmake.h"
 #include "amf/amfnd/avnd.h"
+#include "base/osaf_time.h"
+
+extern struct ImmutilWrapperProfile immutilWrapperProfile;
 
 const char *presence_state[] = {
 "OUT_OF_RANGE", "UNINSTANTIATED", "INSTANTIATING",
@@ -335,6 +338,18 @@ SaAisErrorT amf_saImmOmSearchInitialize_o2(
   scope, searchOptions, 
searchParam,
   attributeNames, 
);
 }
+  } else if (rc == SA_AIS_ERR_NOT_EXIST) {
+// it is possible for 'rootName' to be not yet available
+// at the local immnd. Retry a few times to allow CCB to be propagated.
+unsigned int nTries = 1;
+while (rc == SA_AIS_ERR_NOT_EXIST &&
+  nTries < immutilWrapperProfile.nTries) {
+  osaf_nanosleep();
+  rc = immutil_saImmOmSearchInitialize_o2(immHandle, rootName.c_str(),
+scope, searchOptions, searchParam,
+attributeNames, );
+  nTries++;
+}
   }
   return rc;
 }
-- 
2.11.0





--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 1/2] ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

2017-06-27 Thread Lennart Lund
In ntfimcn the OM handle shall have a short lifespan. Change from creating a
handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.

Change start handling of ntfimcn (in ntf process) so the ntfimcn process is
started on the active node only since the ntfimcn process is not doing
anything on the standby node. Refactor/simplify code accordingly.
---
 src/ntf/ntfd/ntfs_amf.c |   8 +--
 src/ntf/ntfd/ntfs_imcnutil.c| 105 +--
 src/ntf/ntfimcnd/ntfimcn_imm.c  | 119 
 src/ntf/ntfimcnd/ntfimcn_imm.h  |  20 ++-
 src/ntf/ntfimcnd/ntfimcn_main.h |   1 -
 5 files changed, 158 insertions(+), 95 deletions(-)

diff --git a/src/ntf/ntfd/ntfs_amf.c b/src/ntf/ntfd/ntfs_amf.c
index cda35d6..e9c521d 100644
--- a/src/ntf/ntfd/ntfs_amf.c
+++ b/src/ntf/ntfd/ntfs_amf.c
@@ -263,12 +263,10 @@ response:
checkNotificationList();
}
 done:
-   /* Kills the osafntfimcnd process if current state is Active or Standby
-* and the process is not already running in this state. The process
-* will be restarted by the process surveillance thread. This function
-* will not return until the process is terminated.
+   /* Start the osafntfimcn process if becoming active or stop the process
+* if leaving active state
 */
-   handle_state_ntfimcn(ntfs_cb->ha_state);
+   handle_state_ntfimcn(new_haState);
 
TRACE_LEAVE();
 }
diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c
index dd27a25..3add5db 100644
--- a/src/ntf/ntfd/ntfs_imcnutil.c
+++ b/src/ntf/ntfd/ntfs_imcnutil.c
@@ -41,13 +41,13 @@
 #include "osaf/configmake.h"
 
 typedef struct {
-   SaAmfHAStateT ha_state;
pid_t pid;
pthread_t thread;
+   bool ntfimcn_on;
 } init_params_t;
 
-static init_params_t ipar;
-pthread_mutex_t ntfimcn_mutex;
+static init_params_t ipar = {0, 0, false};
+pthread_mutex_t ntfimcn_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 /**
  * Kill the osafntfimcn child process using previously saved Pid
@@ -190,6 +190,14 @@ done:
TRACE_LEAVE();
 }
 
+/**
+ * Create a ntfimcnd process
+ * Note: The process can run on both active and standby node. It must be given
+ *   a start argument with this information.
+ *
+ * @param ha_state[in] Current HA state
+ * @return PID for the created process
+ */
 static pid_t create_imcnprocess(SaAmfHAStateT ha_state)
 {
char *start_args[3];
@@ -226,6 +234,11 @@ static pid_t create_imcnprocess(SaAmfHAStateT ha_state)
  * Start the imcn process and wait for process to exit.
  * If the process exit then restart it.
  *
+ * TODO: (Lennart #2506) The ntfimcn process is no longer started on the
+ *   standby node. Handling of HA state in the ntfimcn process can 
therefore
+ *   be removed. This also means that giving a start argument would no
+ *   longer be needed.
+ *
  * @param _init_params[in]
  * @return
  */
@@ -240,13 +253,18 @@ static void *cnsurvail_thread(void *_init_params)
 
while (1) {
osaf_mutex_lock_ordie(_mutex);
-   pid = create_imcnprocess(ipar->ha_state);
+   /* Note: Always give HA state SA_AMF_HA_ACTIVE to the imcn
+* process since ntfimcn is no longer started on the standby
+* node */
+   pid = create_imcnprocess(SA_AMF_HA_ACTIVE);
ipar->pid = pid;
osaf_mutex_unlock_ordie(_mutex);
 
/* Wait for child process to exit */
do {
rc = waitpid(ipar->pid, , 0);
+   TRACE("%s: waitpid() released. rc = %d",
+   __FUNCTION__, rc);
} while ((rc == -1) && (errno == EINTR));
 
if ((rc == -1) && (errno == ECHILD)) {
@@ -267,29 +285,26 @@ static void *cnsurvail_thread(void *_init_params)
TRACE("osafntfimcnd process terminated reason %s (%d)",
  exit_rc ? "exit" : "other", exit_stat);
}
+   TRACE_LEAVE();
 }
 
 /**
- * Start the imcn process surveillance thread
- *
- * @param ha_state[in]
+ * Start the imcn process surveillance thread.
+ * The surveillance thread starts the imcn process
+ * Note: This shall be done only if we are active
  */
-static void start_cnprocess(SaAmfHAStateT ha_state)
+static void start_ntfimcn(void)
 {
int rc;
 
TRACE_ENTER();
 
-   rc = pthread_mutex_init(_mutex, NULL);
+   rc = pthread_create(
+   , NULL, cnsurvail_thread, (void *));
if (rc != 0)
osaf_abort(rc);
 
-   ipar.ha_state = ha_state;
-
-   rc =
-   pthread_create(, NULL, cnsurvail_thread, (void *));
-   if (rc != 0)
-   osaf_abort(rc);
+   ipar.ntfimcn_on = true;
 
TRACE_LEAVE();
 }
@@ -297,34 +312,45 @@ static void start_cnprocess(SaAmfHAStateT 

[devel] [PATCH 0/2] Review Request for ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

2017-06-27 Thread Lennart Lund
Summary: ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]
Review request for Ticket(s): 2506
Peer Reviewer(s): praveen.malv...@oracle.com; minh.c...@dektech.com.au
Pull request to: 
Affected branch(es): develop, release
Development branch: ticket-2506
Base revision: 829519a4f3a86eb836a55be8301fd5d2befeeec3
Personal repository: git://git.code.sf.net/u/elunlen/review


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesy
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-
*** EXPLAIN/COMMENT THE PATCH SERIES HERE ***
NOTE: Has removed the fixes for not starting ntfimcn on standby node
That part will conflict with ticket #2508. This can be fixed later a ticket
#2434 already exist for this

revision 71763df94b6d58d6e553fa26cc41dbd7cb7d264a
Author: Lennart Lund 
Date:   Tue, 27 Jun 2017 16:05:44 +0200

ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

In ntfimcn the OM handle shall have a short lifespan. Change from creating a
handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.


Complete diffstat:
--
 src/ntf/ntfimcnd/ntfimcn_imm.c  | 119 
 src/ntf/ntfimcnd/ntfimcn_imm.h  |  20 ++-
 src/ntf/ntfimcnd/ntfimcn_main.h |   1 -
 3 files changed, 90 insertions(+), 50 deletions(-)


Testing Commands:
-
*** LIST THE COMMAND LINE TOOLS/STEPS TO TEST YOUR CHANGES ***


Testing, Expected Results:
--
*** PASTE COMMAND OUTPUTS / TEST RESULTS ***


Conditions of Submission:
-
*** HOW MANY DAYS BEFORE PUSHING, CONSENSUS ETC ***


Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  n  n
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.gitconfig file (i.e. user.name, user.email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net

[devel] [PATCH 2/2] ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

2017-06-27 Thread Lennart Lund
In ntfimcn the OM handle shall have a short lifespan. Change from creating a
handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.
---
 src/ntf/ntfd/ntfs_amf.c  |   8 ++--
 src/ntf/ntfd/ntfs_imcnutil.c | 105 +--
 2 files changed, 45 insertions(+), 68 deletions(-)

diff --git a/src/ntf/ntfd/ntfs_amf.c b/src/ntf/ntfd/ntfs_amf.c
index e9c521d..cda35d6 100644
--- a/src/ntf/ntfd/ntfs_amf.c
+++ b/src/ntf/ntfd/ntfs_amf.c
@@ -263,10 +263,12 @@ response:
checkNotificationList();
}
 done:
-   /* Start the osafntfimcn process if becoming active or stop the process
-* if leaving active state
+   /* Kills the osafntfimcnd process if current state is Active or Standby
+* and the process is not already running in this state. The process
+* will be restarted by the process surveillance thread. This function
+* will not return until the process is terminated.
 */
-   handle_state_ntfimcn(new_haState);
+   handle_state_ntfimcn(ntfs_cb->ha_state);
 
TRACE_LEAVE();
 }
diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c
index 3add5db..dd27a25 100644
--- a/src/ntf/ntfd/ntfs_imcnutil.c
+++ b/src/ntf/ntfd/ntfs_imcnutil.c
@@ -41,13 +41,13 @@
 #include "osaf/configmake.h"
 
 typedef struct {
+   SaAmfHAStateT ha_state;
pid_t pid;
pthread_t thread;
-   bool ntfimcn_on;
 } init_params_t;
 
-static init_params_t ipar = {0, 0, false};
-pthread_mutex_t ntfimcn_mutex = PTHREAD_MUTEX_INITIALIZER;
+static init_params_t ipar;
+pthread_mutex_t ntfimcn_mutex;
 
 /**
  * Kill the osafntfimcn child process using previously saved Pid
@@ -190,14 +190,6 @@ done:
TRACE_LEAVE();
 }
 
-/**
- * Create a ntfimcnd process
- * Note: The process can run on both active and standby node. It must be given
- *   a start argument with this information.
- *
- * @param ha_state[in] Current HA state
- * @return PID for the created process
- */
 static pid_t create_imcnprocess(SaAmfHAStateT ha_state)
 {
char *start_args[3];
@@ -234,11 +226,6 @@ static pid_t create_imcnprocess(SaAmfHAStateT ha_state)
  * Start the imcn process and wait for process to exit.
  * If the process exit then restart it.
  *
- * TODO: (Lennart #2506) The ntfimcn process is no longer started on the
- *   standby node. Handling of HA state in the ntfimcn process can 
therefore
- *   be removed. This also means that giving a start argument would no
- *   longer be needed.
- *
  * @param _init_params[in]
  * @return
  */
@@ -253,18 +240,13 @@ static void *cnsurvail_thread(void *_init_params)
 
while (1) {
osaf_mutex_lock_ordie(_mutex);
-   /* Note: Always give HA state SA_AMF_HA_ACTIVE to the imcn
-* process since ntfimcn is no longer started on the standby
-* node */
-   pid = create_imcnprocess(SA_AMF_HA_ACTIVE);
+   pid = create_imcnprocess(ipar->ha_state);
ipar->pid = pid;
osaf_mutex_unlock_ordie(_mutex);
 
/* Wait for child process to exit */
do {
rc = waitpid(ipar->pid, , 0);
-   TRACE("%s: waitpid() released. rc = %d",
-   __FUNCTION__, rc);
} while ((rc == -1) && (errno == EINTR));
 
if ((rc == -1) && (errno == ECHILD)) {
@@ -285,26 +267,29 @@ static void *cnsurvail_thread(void *_init_params)
TRACE("osafntfimcnd process terminated reason %s (%d)",
  exit_rc ? "exit" : "other", exit_stat);
}
-   TRACE_LEAVE();
 }
 
 /**
- * Start the imcn process surveillance thread.
- * The surveillance thread starts the imcn process
- * Note: This shall be done only if we are active
+ * Start the imcn process surveillance thread
+ *
+ * @param ha_state[in]
  */
-static void start_ntfimcn(void)
+static void start_cnprocess(SaAmfHAStateT ha_state)
 {
int rc;
 
TRACE_ENTER();
 
-   rc = pthread_create(
-   , NULL, cnsurvail_thread, (void *));
+   rc = pthread_mutex_init(_mutex, NULL);
if (rc != 0)
osaf_abort(rc);
 
-   ipar.ntfimcn_on = true;
+   ipar.ha_state = ha_state;
+
+   rc =
+   pthread_create(, NULL, cnsurvail_thread, (void *));
+   if (rc != 0)
+   osaf_abort(rc);
 
TRACE_LEAVE();
 }
@@ -312,45 +297,34 @@ static void start_ntfimcn(void)
 /**
  * Initialize the configuration change notifier
  *
- * Start ntfimcn if we are active
- *
  * @param ha_state[in]
  */
-void init_ntfimcn(SaAmfHAStateT ha_state)
-{
-   TRACE_ENTER();
-   if (ha_state == SA_AMF_HA_ACTIVE) {
-   start_ntfimcn();
-   }
-   TRACE_LEAVE();
-}
+void init_ntfimcn(SaAmfHAStateT ha_state) { start_cnprocess(ha_state); }
 
 /**
- * 

Re: [devel] [PATCH 1/1] ntfd: Ensure mutex is not taken after cnsurvail_thread is canceled [#2508]

2017-06-27 Thread Lennart Lund
Hi Minh

See my comment inline [Lennart]

Thanks
Lennart

> -Original Message-
> From: minh chau [mailto:minh.c...@dektech.com.au]
> Sent: den 27 juni 2017 07:28
> To: Lennart Lund ;
> praveen.malv...@oracle.com
> Cc: opensaf-devel@lists.sourceforge.net; Anders Widell
> 
> Subject: Re: [PATCH 1/1] ntfd: Ensure mutex is not taken after
> cnsurvail_thread is canceled [#2508]
> 
> Hi Lennart,
> 
> Please see my comments inline [Minh]
> 
> Thanks,
> Minh
> 
> On 27/06/17 00:16, Lennart Lund wrote:
> > Hi Minh
> >
> > Yes, this is what I suggested but see my comments inline [Lennart]
> >
> > It is not a good idea to use ASYNCHRONOUS since some functions used in
> the thread is not allowed if this is the cancellation type. See "Async-cancel-
> safe functions" in pthreads section in Linux man pages.
> [Minh] So we need to re-ensure ASYNCHRONOUS is also not used in
> somewhere else in our code.
> > However, PTHREAD_CANCEL_DEFERRED do create a potential problem. If
> the cancellation request is given when the thread is no longer waiting in
> waitpid() but before it has taken the mutex the thread will then reach
> create_imcnprocess() that will create the imcnprocess. To be able to kill the
> process the Pid is needed and for that purpose the Pid saved in the ipar
> structure is used. However the pid is not saved until after the
> create_imcnprocess() returns. If for some reason the thread is cancelled
> after the process is created but before the Pid is saved the process will not
> be possible to kill (we have no Pid). The current code does not have a
> cancellation point between the fork() and saving of the pid so this should not
> happen but you never know what will happen to the code in the future so I
> suggest that pthread_testcancel() is called just before fork() is called.
> [Minh]: I think this ticket is originated from 2 problems:
> 1. Stopping surveillance should be in reverse order of starting it. That
> means, start sequence currently is: ntfd mainthread ->
> cnsurvail_thread() -> ntfimcnd. However, stop sequence is:
> cnsurvail_thread -> ntfimcnd -> ntfd_mainthread. So that should be the
> reason of the above problem you mentioned, where we don't Pid to kill
> ntfimcnd
> 2. In stopping sequence from top down view of surveillance, the main
> thread is trying to stop cnsurvail_thread, while the cnsurvail_thread is
> going to start ntfimcnd. This is causing trouble and there should be
> someway to get cnsurvail_thread being aware that it does not start
> ntfimcnd in stopping sequence.
> 
> Attaching a simple patch that solves 2 above problems, so we don't have
> to add too much threading stuffs for this ticket
[Lennart] The solution in your patch seems to do the job.
However it is cryptic and hard to understand. This could be fixed by adding 
simple comments in both the thread and stop functions. Re-using the ha_state 
flag here for a completely different purpose than it is intended for and named 
to inform about is most confusing and misleading!
> 
> > As I understand the original problem is that the ntfd process is aborted
> because it is trying to destroy the ntfimcn_mutex while it is locked by the
> thread but why is this happening?
> > As far as I can see there is no cancellation point between where the mutex
> is locked and unlocked and the cancellation type is
> PTHREAD_CANCEL_DEFERRED. The only cancellation point in the thread is the
> waitpid().
> [Minh]: This problem happened I think because one of other cancellation
> points inside log/trace get called, where create_imcnprocess() does
> LOG_ER/TRACE function.
> >
> > Thanks
> > Lennart
> >
> >> -Original Message-
> >> From: minh chau [mailto:minh.c...@dektech.com.au]
> >> Sent: den 23 juni 2017 07:42
> >> To: Lennart Lund;
> >> praveen.malv...@oracle.com
> >> Cc:opensaf-devel@lists.sourceforge.net; Anders Widell
> >> 
> >> Subject: Re: [PATCH 1/1] ntfd: Ensure mutex is not taken after
> >> cnsurvail_thread is canceled [#2508]
> >>
> >> Hi Lennart,
> >>
> >> Using ASYNCHRONOUS in this thread does not sound that bad, since the
> >> thread does not deal with IO, allocation/deallocation, ...
> >> By the way, is the patch below what your suggestion if I understand it
> >> right?
> >>
> >> Thanks,
> >> Minh
> >>
> >> diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c
> >> index dd27a255c..7f817075f 100644
> >> --- a/src/ntf/ntfd/ntfs_imcnutil.c
> >> +++ b/src/ntf/ntfd/ntfs_imcnutil.c
> >> @@ -220,6 +220,20 @@ static pid_t create_imcnprocess(SaAmfHAStateT
> >> ha_state)
> >>TRACE_LEAVE();
> >>return i_pid;
> >>}
> >> +/*
> >> + * Cleanup handler for cnsurvail thread
> >> + * @param: arg[in]: a mutex
> >> + */
> >> +void cnsurvail_cleanup_handler(void* arg)
> >> +{
> >> +  pthread_mutex_t* mutex = (pthread_mutex_t*)(arg);
> >> +  TRACE_ENTER();
> >> +  if (mutex) {
> >> +

[devel] [PATCH 0/1] Review Request for clmtest: update non-member node_id for new test environment [#2512]

2017-06-27 Thread Hoang Vo
Summary: clmtest: update non-member node_id for new test environment [#2512]
Review request for Ticket(s): 2512
Peer Reviewer(s): praveen.malv...@oracle.com
Pull request to: praveen.malv...@oracle.com
Affected branch(es): develop, release
Development branch: ticket-2512
Base revision: 829519a4f3a86eb836a55be8301fd5d2befeeec3
Personal repository: git://git.code.sf.net/u/swgerai/review


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesn
 OpenSAF servicesn
 Core libraries  n
 Samples n
 Tests   y
 Other   n


Comments (indicate scope for each "y" above):
-

revision 712a8968c0cf969868e2473310dd8511cc15212f
Author: Hoang Vo 
Date:   Tue, 27 Jun 2017 16:24:59 +0700

clmtest: update non-member node_id for new test environment [#2512]

clmtest 7 7 check saClmClusterNodeGet with non-member node,
previously designed as 0x2060F.
change test node_id to 0x2990F to avoid future conflict
when test environment is upgraded.



Complete diffstat:
--
 src/clm/apitest/tet_saClmClusterNodeGet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)


Testing Commands:
-
clmtest 7 7

Testing, Expected Results:
--
test case pass with 2SCs 4PLs environment

Conditions of Submission:
-
ACK from maintainer

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.gitconfig file (i.e. user.name, user.email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

2017-06-27 Thread minh chau

Hi Lennart,

It's no problem to me at all. I'm still reading the patch and will get 
back with comments if any


Thanks
Minh

On 27/06/17 17:32, Lennart Lund wrote:

Hi Minh

Yes you are right, I didn't think about that the #2234 ticket existed. However, to not start the 
imcn process on standby is part of the solution so when this fix is pushed also ticket #2234 can be 
set to "fixed" with a reference to this ticket. I have changed #2234 to accepted and set 
myself as "Owner". You think this is Ok?

Another thing. I have simplified the mutex handling by making the mutex 
permanent and have removed dynamic creating and destroying the mutex. However I 
think it is a good idea to keep the dynamic handling. I will change back to 
this. Any comments?

Thanks
Lennart


-Original Message-
From: minh chau [mailto:minh.c...@dektech.com.au]
Sent: den 27 juni 2017 07:29
To: Lennart Lund ;
praveen.malv...@oracle.com
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] ntf: ntfimcn does not handle
SA_ERR_UNAVAILABLE [#2506]

Hi Lennart,

It seems this patch includes #2234?
I'm still looking at the patch.

Thanks,
Minh

On 26/06/17 22:20, Lennart Lund wrote:

In ntfimcn the OM handle shall have a short lifespan. Change from creating

a

handle once when ntfimcn process starts to create a handle each time it is
needed and finalize when no longer needed.

Change start handling of ntfimcn (in ntf process) so the ntfimcn process is
started on the active node only since the ntfimcn process is not doing
anything on the standby node. Refactor/simplify code accordingly.
---
   src/ntf/ntfd/ntfs_amf.c |   8 +--
   src/ntf/ntfd/ntfs_imcnutil.c| 105 +--
   src/ntf/ntfimcnd/ntfimcn_imm.c  | 119 



   src/ntf/ntfimcnd/ntfimcn_imm.h  |  20 ++-
   src/ntf/ntfimcnd/ntfimcn_main.h |   1 -
   5 files changed, 158 insertions(+), 95 deletions(-)

diff --git a/src/ntf/ntfd/ntfs_amf.c b/src/ntf/ntfd/ntfs_amf.c
index cda35d6..e9c521d 100644
--- a/src/ntf/ntfd/ntfs_amf.c
+++ b/src/ntf/ntfd/ntfs_amf.c
@@ -263,12 +263,10 @@ response:
checkNotificationList();
}
   done:
-   /* Kills the osafntfimcnd process if current state is Active or Standby
-* and the process is not already running in this state. The process
-* will be restarted by the process surveillance thread. This function
-* will not return until the process is terminated.
+   /* Start the osafntfimcn process if becoming active or stop the

process

+* if leaving active state
 */
-   handle_state_ntfimcn(ntfs_cb->ha_state);
+   handle_state_ntfimcn(new_haState);

TRACE_LEAVE();
   }
diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c
index dd27a25..3add5db 100644
--- a/src/ntf/ntfd/ntfs_imcnutil.c
+++ b/src/ntf/ntfd/ntfs_imcnutil.c
@@ -41,13 +41,13 @@
   #include "osaf/configmake.h"

   typedef struct {
-   SaAmfHAStateT ha_state;
pid_t pid;
pthread_t thread;
+   bool ntfimcn_on;
   } init_params_t;

-static init_params_t ipar;
-pthread_mutex_t ntfimcn_mutex;
+static init_params_t ipar = {0, 0, false};
+pthread_mutex_t ntfimcn_mutex = PTHREAD_MUTEX_INITIALIZER;

   /**
* Kill the osafntfimcn child process using previously saved Pid
@@ -190,6 +190,14 @@ done:
TRACE_LEAVE();
   }

+/**
+ * Create a ntfimcnd process
+ * Note: The process can run on both active and standby node. It must be

given

+ *   a start argument with this information.
+ *
+ * @param ha_state[in] Current HA state
+ * @return PID for the created process
+ */
   static pid_t create_imcnprocess(SaAmfHAStateT ha_state)
   {
char *start_args[3];
@@ -226,6 +234,11 @@ static pid_t create_imcnprocess(SaAmfHAStateT

ha_state)

* Start the imcn process and wait for process to exit.
* If the process exit then restart it.
*
+ * TODO: (Lennart #2506) The ntfimcn process is no longer started on the
+ *   standby node. Handling of HA state in the ntfimcn process can

therefore

+ *   be removed. This also means that giving a start argument would no
+ *   longer be needed.
+ *
* @param _init_params[in]
* @return
*/
@@ -240,13 +253,18 @@ static void *cnsurvail_thread(void *_init_params)

while (1) {
osaf_mutex_lock_ordie(_mutex);
-   pid = create_imcnprocess(ipar->ha_state);
+   /* Note: Always give HA state SA_AMF_HA_ACTIVE to the

imcn

+* process since ntfimcn is no longer started on the standby
+* node */
+   pid = create_imcnprocess(SA_AMF_HA_ACTIVE);
ipar->pid = pid;
osaf_mutex_unlock_ordie(_mutex);

/* Wait for child process to exit */
do {
rc = waitpid(ipar->pid, , 0);
+   TRACE("%s: waitpid() released. 

Re: [devel] [PATCH 1/1] amfd: Do not log warning when create (or delete) a existed(or nonexisted) SUSI [#2467]

2017-06-27 Thread praveen malviya

Hi Minh,

One comment inline with [ Praveen].

Thanks
Praveen

On 25-May-17 12:53 PM, Minh Chau wrote:

---
  src/amf/amfd/imm.cc | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc
index 7b1aa333e..26faffcb5 100644
--- a/src/amf/amfd/imm.cc
+++ b/src/amf/amfd/imm.cc
@@ -1893,7 +1893,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string 
,
  rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle,
  const_cast(className.c_str()),
  parent_name, attrValues);
-if (rc != SA_AIS_OK) {
+if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) {
LOG_WA("saImmOiRtObjectCreate_2 of className:'%s', parentName:'%s',"
" failed with %u", className.c_str(), parentName.c_str(), rc);
  }
[Praveen] if return code is ERR_EXIST, then it means RT object exists in 
IMM. In such a situation second if block in this function should not 
push it in job queue.

@@ -1946,7 +1946,7 @@ void avd_saImmOiRtObjectDelete_sync(const std::string 
) {
  
if (isImmReady == true) {

  rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str());
-if (rc != SA_AIS_OK) {
+if (rc != SA_AIS_OK  && rc != SA_AIS_ERR_NOT_EXIST) {
LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), 
rc);
  }
}



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] ntfd: Ensure mutex is not taken after cnsurvail_thread is canceled [#2508]

2017-06-27 Thread minh chau

Hi Hans,

The problem happened at the first place was without trace, I'm guessing 
because of LOG_ER, where write() get hit and the error string might not 
have been printed


static pid_t create_imcnprocess(SaAmfHAStateT ha_state) {
...
i_pid = fork();
if (i_pid == -1) {
*LOG_ER("Failed to fork %s", strerror(errno));*
abort();
} else if (i_pid == 0) {
...
}

The main thread is getting SIG_TERM and its child thread is doing fork()
I have sent another patch to prevent this happen, I think Lennart is 
also checking the idea


If join_ret is not used, NULL should be passed, otherwise it should 
check against PTHREAD_CANCELED.


Thanks!
Minh

On 27/06/17 17:23, Hans Nordebäck wrote:

Hi Minh,

Is trace enabled when this problem occurs? If so trace adds cancellation 
points, e.g. at fork/exec in create_imcnprocess.
In stop_ntfimcn, pthread_join the join_ret variable is not used, use NULL 
instead, looking at the backtrace in the ticket, it's value is -1.

/BR Hans


-Original Message-
From: minh chau [mailto:minh.c...@dektech.com.au]
Sent: den 23 juni 2017 07:42
To: Lennart Lund ; praveen.malv...@oracle.com
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [devel] [PATCH 1/1] ntfd: Ensure mutex is not taken after 
cnsurvail_thread is canceled [#2508]

Hi Lennart,

Using ASYNCHRONOUS in this thread does not sound that bad, since the thread 
does not deal with IO, allocation/deallocation, ...
By the way, is the patch below what your suggestion if I understand it right?

Thanks,
Minh

diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c index 
dd27a255c..7f817075f 100644
--- a/src/ntf/ntfd/ntfs_imcnutil.c
+++ b/src/ntf/ntfd/ntfs_imcnutil.c
@@ -220,6 +220,20 @@ static pid_t create_imcnprocess(SaAmfHAStateT ha_state)
   TRACE_LEAVE();
   return i_pid;
   }
+/*
+ * Cleanup handler for cnsurvail thread
+ * @param: arg[in]: a mutex
+ */
+void cnsurvail_cleanup_handler(void* arg) {
+  pthread_mutex_t* mutex = (pthread_mutex_t*)(arg);
+  TRACE_ENTER();
+  if (mutex) {
+TRACE("osaf_mutex_unlock_ordie");
+osaf_mutex_unlock_ordie(_mutex);
+  }
+  TRACE_LEAVE();
+}

   /**
* Thread:
@@ -240,9 +254,10 @@ static void *cnsurvail_thread(void *_init_params)

   while (1) {
   osaf_mutex_lock_ordie(_mutex);
+pthread_cleanup_push(cnsurvail_cleanup_handler, _mutex);
   pid = create_imcnprocess(ipar->ha_state);
   ipar->pid = pid;
-osaf_mutex_unlock_ordie(_mutex);
+pthread_cleanup_pop(1);

   /* Wait for child process to exit */
   do {
@@ -344,7 +359,9 @@ int stop_ntfimcn(void)

   if (ipar.ha_state != 0) {
   TRACE("%s: Cancel the imcn surveillance thread", __FUNCTION__);
+osaf_mutex_lock_ordie(_mutex);
   rc = pthread_cancel(ipar.thread);
+osaf_mutex_unlock_ordie(_mutex);
   if (rc != 0)
   osaf_abort(rc);
   rc = pthread_join(ipar.thread, _ret);

On 22/06/17 18:57, Lennart Lund wrote:

Hi Minh

This solution will probably work quite well but there is still a minor
risk that the mutex will be taken before the thread is cancelled and there is also a risk 
(see Note2) "Asynchronous cancelability means that the thread can be canceled at any 
time (usually immediately, but the system does not guarantee this)"

Instead:
In the thread a cancellation function that unlocks the mutex can be pushed just 
after the mutex is locked. This function takes the mutex as in-parameter and its 
only function is to unlock the mutex. Instead of unlocking the mutex by calling 
osaf_mutex_unlock_ordie(_mutex) as in the current code the cancellation 
function is popped and executed (it is unlocking the mutex) See 
pthread_cleanup_push() and pthread_cleanup_pop(). With this solution the 
cancellation type should not be changed to ASYNCHORNOUS it shall remain 
PTHREAD_CANCEL_DEFERRED. This will guarantee that the thread will never be 
terminated with the mutex locked.
Note1: Also with this solution the mutex shall be taken in the main thread 
before rc = pthread_cancel(ipar.thread); and released just after.

Note2: I found that changing to cancellation type ASYNCHORNOUS is not safe 
because only a few are async-cancel-safe and for example waitpid() is not one 
of the safe fuctions!

Thanks
Lennart


-Original Message-
From: Minh Chau [mailto:minh.c...@dektech.com.au]
Sent: den 22 juni 2017 06:14
To: Lennart Lund ;
praveen.malv...@oracle.com
Cc: opensaf-devel@lists.sourceforge.net; Minh Hon Chau

Subject: [PATCH 1/1] ntfd: Ensure mutex is not taken after
cnsurvail_thread is canceled [#2508]

In the scenario of shutting down SC while SC switchover is on going,
ntfd coredump is generated due to failure of pthread_mutex_destroy()
with errorcode:16(EBUSY). That means the mutex had been taken and was
not unlocked at the time phtread_mutex_destroy() is called.

One solution is 

Re: [devel] [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]

2017-06-27 Thread praveen malviya
If rdegetrole does not exist (on payload) then also else block will be 
excuted and else block is for payload. So present patch will always work.


Thanks,
Praveen

On 27-Jun-17 12:50 PM, Lennart Lund wrote:

Hi Praveen

I removed the check if rdegetrole exist on the node but I think it is a good 
idea to still have this check.
This means that the check if we are on a payload node should be to first check 
if rdegetrole exist and if it does then check if the return code is Fail.

Thanks
Lennart


-Original Message-
From: praveen malviya [mailto:praveen.malv...@oracle.com]
Sent: den 27 juni 2017 08:29
To: Lennart Lund 
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]

Ack.

Thanks
Praveen

On 26-Jun-17 8:35 PM, Lennart Lund wrote:

Fix incorrect detection of node type and misleading information to the user

of ntftest

---
   src/ntf/apitest/tet_ntf_clm.c | 14 --
   1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/ntf/apitest/tet_ntf_clm.c b/src/ntf/apitest/tet_ntf_clm.c
index 5b1d8c6..0f2c7d0 100644
--- a/src/ntf/apitest/tet_ntf_clm.c
+++ b/src/ntf/apitest/tet_ntf_clm.c
@@ -444,13 +444,13 @@ __attribute__((constructor)) static void

ntf_clm_constructor(void)

// printf("lock_cmd:'%s'\n",lock_cmd);
// printf("unlock_cmd:'%s'\n",unlock_cmd);

-   // Add these test cases on other than active controller.
+   // The following tests are added only if not running on an Active
+   // controller node
int rc = 0;
char role[80];
-   rc = system("which rdegetrole");
+   rc = system("rdegetrole");
if (rc == 0) {
-   printf("This is a controller node\n");
-   // Command rdegetrole exists means a controller.
+   // Command rdegetrole returning OK means controller node.
memset(buffer, '\0', sizeof(buffer));
memset(role, '\0', sizeof(role));
strcpy(buffer, "rdegetrole");
@@ -459,14 +459,16 @@ __attribute__((constructor)) static void

ntf_clm_constructor(void)

if ((ptr = strchr(role, '\n')) != NULL)
*ptr = '\0';
if (!strcmp((char *)role, "ACTIVE")) {
-   // printf("Active controller node\n");
+   printf("Active controller node. "
+   "Do not run CLM tests\n");
pclose(fp);
return;
}
+   printf("Standby controller node. Run CLM tests\n");
}
pclose(fp);
} else {
-   printf("This is a payload node\n");
+   printf("Payload node. Run CLM tests\n");
}

test_suite_add(40, "Ntf CLM Integration test suite\n");



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]

2017-06-27 Thread Lennart Lund
Hi Minh

Yes you are right, I didn't think about that the #2234 ticket existed. However, 
to not start the imcn process on standby is part of the solution so when this 
fix is pushed also ticket #2234 can be set to "fixed" with a reference to this 
ticket. I have changed #2234 to accepted and set myself as "Owner". You think 
this is Ok?

Another thing. I have simplified the mutex handling by making the mutex 
permanent and have removed dynamic creating and destroying the mutex. However I 
think it is a good idea to keep the dynamic handling. I will change back to 
this. Any comments?

Thanks
Lennart

> -Original Message-
> From: minh chau [mailto:minh.c...@dektech.com.au]
> Sent: den 27 juni 2017 07:29
> To: Lennart Lund ;
> praveen.malv...@oracle.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1/1] ntf: ntfimcn does not handle
> SA_ERR_UNAVAILABLE [#2506]
> 
> Hi Lennart,
> 
> It seems this patch includes #2234?
> I'm still looking at the patch.
> 
> Thanks,
> Minh
> 
> On 26/06/17 22:20, Lennart Lund wrote:
> > In ntfimcn the OM handle shall have a short lifespan. Change from creating
> a
> > handle once when ntfimcn process starts to create a handle each time it is
> > needed and finalize when no longer needed.
> >
> > Change start handling of ntfimcn (in ntf process) so the ntfimcn process is
> > started on the active node only since the ntfimcn process is not doing
> > anything on the standby node. Refactor/simplify code accordingly.
> > ---
> >   src/ntf/ntfd/ntfs_amf.c |   8 +--
> >   src/ntf/ntfd/ntfs_imcnutil.c| 105 +--
> >   src/ntf/ntfimcnd/ntfimcn_imm.c  | 119 
> 
> >   src/ntf/ntfimcnd/ntfimcn_imm.h  |  20 ++-
> >   src/ntf/ntfimcnd/ntfimcn_main.h |   1 -
> >   5 files changed, 158 insertions(+), 95 deletions(-)
> >
> > diff --git a/src/ntf/ntfd/ntfs_amf.c b/src/ntf/ntfd/ntfs_amf.c
> > index cda35d6..e9c521d 100644
> > --- a/src/ntf/ntfd/ntfs_amf.c
> > +++ b/src/ntf/ntfd/ntfs_amf.c
> > @@ -263,12 +263,10 @@ response:
> > checkNotificationList();
> > }
> >   done:
> > -   /* Kills the osafntfimcnd process if current state is Active or Standby
> > -* and the process is not already running in this state. The process
> > -* will be restarted by the process surveillance thread. This function
> > -* will not return until the process is terminated.
> > +   /* Start the osafntfimcn process if becoming active or stop the
> process
> > +* if leaving active state
> >  */
> > -   handle_state_ntfimcn(ntfs_cb->ha_state);
> > +   handle_state_ntfimcn(new_haState);
> >
> > TRACE_LEAVE();
> >   }
> > diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c
> > index dd27a25..3add5db 100644
> > --- a/src/ntf/ntfd/ntfs_imcnutil.c
> > +++ b/src/ntf/ntfd/ntfs_imcnutil.c
> > @@ -41,13 +41,13 @@
> >   #include "osaf/configmake.h"
> >
> >   typedef struct {
> > -   SaAmfHAStateT ha_state;
> > pid_t pid;
> > pthread_t thread;
> > +   bool ntfimcn_on;
> >   } init_params_t;
> >
> > -static init_params_t ipar;
> > -pthread_mutex_t ntfimcn_mutex;
> > +static init_params_t ipar = {0, 0, false};
> > +pthread_mutex_t ntfimcn_mutex = PTHREAD_MUTEX_INITIALIZER;
> >
> >   /**
> >* Kill the osafntfimcn child process using previously saved Pid
> > @@ -190,6 +190,14 @@ done:
> > TRACE_LEAVE();
> >   }
> >
> > +/**
> > + * Create a ntfimcnd process
> > + * Note: The process can run on both active and standby node. It must be
> given
> > + *   a start argument with this information.
> > + *
> > + * @param ha_state[in] Current HA state
> > + * @return PID for the created process
> > + */
> >   static pid_t create_imcnprocess(SaAmfHAStateT ha_state)
> >   {
> > char *start_args[3];
> > @@ -226,6 +234,11 @@ static pid_t create_imcnprocess(SaAmfHAStateT
> ha_state)
> >* Start the imcn process and wait for process to exit.
> >* If the process exit then restart it.
> >*
> > + * TODO: (Lennart #2506) The ntfimcn process is no longer started on the
> > + *   standby node. Handling of HA state in the ntfimcn process can
> therefore
> > + *   be removed. This also means that giving a start argument would no
> > + *   longer be needed.
> > + *
> >* @param _init_params[in]
> >* @return
> >*/
> > @@ -240,13 +253,18 @@ static void *cnsurvail_thread(void *_init_params)
> >
> > while (1) {
> > osaf_mutex_lock_ordie(_mutex);
> > -   pid = create_imcnprocess(ipar->ha_state);
> > +   /* Note: Always give HA state SA_AMF_HA_ACTIVE to the
> imcn
> > +* process since ntfimcn is no longer started on the standby
> > +* node */
> > +   pid = create_imcnprocess(SA_AMF_HA_ACTIVE);
> > ipar->pid = pid;
> > osaf_mutex_unlock_ordie(_mutex);
> >
> > /* Wait for child process to exit */
> > do {

Re: [devel] [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]

2017-06-27 Thread Lennart Lund
Hi Praveen

I removed the check if rdegetrole exist on the node but I think it is a good 
idea to still have this check.
This means that the check if we are on a payload node should be to first check 
if rdegetrole exist and if it does then check if the return code is Fail.

Thanks
Lennart

> -Original Message-
> From: praveen malviya [mailto:praveen.malv...@oracle.com]
> Sent: den 27 juni 2017 08:29
> To: Lennart Lund 
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]
> 
> Ack.
> 
> Thanks
> Praveen
> 
> On 26-Jun-17 8:35 PM, Lennart Lund wrote:
> > Fix incorrect detection of node type and misleading information to the user
> of ntftest
> > ---
> >   src/ntf/apitest/tet_ntf_clm.c | 14 --
> >   1 file changed, 8 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/ntf/apitest/tet_ntf_clm.c b/src/ntf/apitest/tet_ntf_clm.c
> > index 5b1d8c6..0f2c7d0 100644
> > --- a/src/ntf/apitest/tet_ntf_clm.c
> > +++ b/src/ntf/apitest/tet_ntf_clm.c
> > @@ -444,13 +444,13 @@ __attribute__((constructor)) static void
> ntf_clm_constructor(void)
> > // printf("lock_cmd:'%s'\n",lock_cmd);
> > // printf("unlock_cmd:'%s'\n",unlock_cmd);
> >
> > -   // Add these test cases on other than active controller.
> > +   // The following tests are added only if not running on an Active
> > +   // controller node
> > int rc = 0;
> > char role[80];
> > -   rc = system("which rdegetrole");
> > +   rc = system("rdegetrole");
> > if (rc == 0) {
> > -   printf("This is a controller node\n");
> > -   // Command rdegetrole exists means a controller.
> > +   // Command rdegetrole returning OK means controller node.
> > memset(buffer, '\0', sizeof(buffer));
> > memset(role, '\0', sizeof(role));
> > strcpy(buffer, "rdegetrole");
> > @@ -459,14 +459,16 @@ __attribute__((constructor)) static void
> ntf_clm_constructor(void)
> > if ((ptr = strchr(role, '\n')) != NULL)
> > *ptr = '\0';
> > if (!strcmp((char *)role, "ACTIVE")) {
> > -   // printf("Active controller node\n");
> > +   printf("Active controller node. "
> > +   "Do not run CLM tests\n");
> > pclose(fp);
> > return;
> > }
> > +   printf("Standby controller node. Run CLM tests\n");
> > }
> > pclose(fp);
> > } else {
> > -   printf("This is a payload node\n");
> > +   printf("Payload node. Run CLM tests\n");
> > }
> >
> > test_suite_add(40, "Ntf CLM Integration test suite\n");
> >
--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]

2017-06-27 Thread praveen malviya

Ack.

Thanks
Praveen

On 26-Jun-17 8:35 PM, Lennart Lund wrote:

Fix incorrect detection of node type and misleading information to the user of 
ntftest
---
  src/ntf/apitest/tet_ntf_clm.c | 14 --
  1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/ntf/apitest/tet_ntf_clm.c b/src/ntf/apitest/tet_ntf_clm.c
index 5b1d8c6..0f2c7d0 100644
--- a/src/ntf/apitest/tet_ntf_clm.c
+++ b/src/ntf/apitest/tet_ntf_clm.c
@@ -444,13 +444,13 @@ __attribute__((constructor)) static void 
ntf_clm_constructor(void)
// printf("lock_cmd:'%s'\n",lock_cmd);
// printf("unlock_cmd:'%s'\n",unlock_cmd);
  
-	// Add these test cases on other than active controller.

+   // The following tests are added only if not running on an Active
+   // controller node
int rc = 0;
char role[80];
-   rc = system("which rdegetrole");
+   rc = system("rdegetrole");
if (rc == 0) {
-   printf("This is a controller node\n");
-   // Command rdegetrole exists means a controller.
+   // Command rdegetrole returning OK means controller node.
memset(buffer, '\0', sizeof(buffer));
memset(role, '\0', sizeof(role));
strcpy(buffer, "rdegetrole");
@@ -459,14 +459,16 @@ __attribute__((constructor)) static void 
ntf_clm_constructor(void)
if ((ptr = strchr(role, '\n')) != NULL)
*ptr = '\0';
if (!strcmp((char *)role, "ACTIVE")) {
-   // printf("Active controller node\n");
+   printf("Active controller node. "
+   "Do not run CLM tests\n");
pclose(fp);
return;
}
+   printf("Standby controller node. Run CLM tests\n");
}
pclose(fp);
} else {
-   printf("This is a payload node\n");
+   printf("Payload node. Run CLM tests\n");
}
  
  	test_suite_add(40, "Ntf CLM Integration test suite\n");




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] amfd: Accept ERR_NOT_EXIST on stopping track callback [#2469]

2017-06-27 Thread praveen malviya

Ack, code review only.

Thanks,
Praveen

On 26-Jun-17 9:25 AM, Minh Chau wrote:

During switchover, standby amfd tries to stop clm tracking,
amfd first got ERR_TIMEOUT and second tries got ERR_NOT_EXIST.

In CLM spec, ERR_TIMEOUT return means the stop clm tracking
may or may not be successful. If the first call doesn't succeed,
the second call will be OK. In the scope of this ticket, the
first already succeeded, therefore amfd got ERR_NOT_EXIST.

Note that ERR_NOT_EXIST doesn't mean that the CLM handle is
invalid (or BAD HANDLE), thus this error code can be ignored
since standby amfd doesn't need to track clm anymore.
---
  src/amf/amfd/clm.cc  | 4 
  src/amf/amfd/role.cc | 5 +++--
  2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
index 4f69d4a58..1b451e9e7 100644
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -495,6 +495,10 @@ SaAisErrorT avd_clm_track_stop(void) {
  if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT ||
  error == SA_AIS_ERR_UNAVAILABLE) {
LOG_WA("Failed to stop cluster tracking %u", error);
+} else if (error == SA_AIS_ERR_NOT_EXIST) {
+  /* track changes was not started or stopped successfully */
+  LOG_WA("Failed to stop cluster tracking %u", error);
+  avd_cb->is_clm_track_started = false;
  } else {
LOG_ER("Failed to stop cluster tracking %u", error);
  }
diff --git a/src/amf/amfd/role.cc b/src/amf/amfd/role.cc
index 85cde7fb7..ec13c3bd8 100644
--- a/src/amf/amfd/role.cc
+++ b/src/amf/amfd/role.cc
@@ -1105,7 +1105,7 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb) {
  
  uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) {

uint32_t status = NCSCC_RC_SUCCESS;
-
+  SaAisErrorT ais_rc;
TRACE_ENTER();
LOG_NO("Switching Quiesced --> StandBy");
  
@@ -1139,7 +1139,8 @@ uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) {

}
  
if (cb->is_clm_track_started == true) {

-if (avd_clm_track_stop() != SA_AIS_OK) {
+ais_rc = avd_clm_track_stop();
+if (ais_rc != SA_AIS_OK && ais_rc != SA_AIS_ERR_NOT_EXIST) {
LOG_ER("Failed to stop cluster tracking after switch over");
  }
}



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel