Github user DaveBirdsall commented on a diff in the pull request:
https://github.com/apache/trafodion/pull/1457#discussion_r171323042
--- Diff: core/sqf/monitor/linux/zclient.cxx ---
@@ -488,6 +488,103 @@ int CZClient::ZooExistRetry(zhandle_t *zh, const char
*path, int watch, struct S
return rc;
}
+const char* CZClient::WaitForAndReturnMaster( bool doWait )
+{
+ const char method_name[] = "CZClient::WaitForAndReturnMaster";
+ TRACE_ENTRY;
+
+ bool found = false;
+ int rc = -1;
+ int retries = 0;
+ Stat stat;
+
+ struct String_vector nodes = {0, NULL};
+ stringstream ss;
+ ss.str( "" );
+ ss << zkRootNode_.c_str()
+ << zkRootNodeInstance_.c_str()
+ << ZCLIENT_MASTER_ZNODE;
+ string masterMonitor( ss.str( ) );
+
+ // wait for 3 minutes for giving up.
+ while ( (!found) && (retries < 180))
+ {
+ if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+ {
+ trace_printf( "%s@%d trafCluster=%s\n"
+ , method_name, __LINE__, masterMonitor.c_str() );
+ }
+ // Verify the existence of the parent ZCLIENT_MASTER_ZNODE
+ rc = ZooExistRetry( ZHandle, masterMonitor.c_str( ), 0, &stat );
+
+ if ( rc == ZNONODE )
+ {
+ if (doWait == false)
+ {
+ break;
+ }
+ continue;
--- End diff --
Should we sleep in this path? Otherwise we seem to be in a spinning
situation?
---