Code review fixes.

Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/3d7855b6
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/3d7855b6
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/3d7855b6

Branch: refs/heads/master
Commit: 3d7855b6f64733ad3776fc421cb598883acbb6bf
Parents: bded0e8
Author: Zalo Correa <[email protected]>
Authored: Wed Feb 28 15:23:31 2018 -0800
Committer: Zalo Correa <[email protected]>
Committed: Wed Feb 28 15:23:31 2018 -0800

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx | 128 ++++++++++++++++----------------
 core/sqf/monitor/linux/cluster.h   |  25 +++----
 core/sqf/monitor/linux/pnode.cxx   |   1 -
 core/sqf/monitor/linux/tmsync.cxx  |  10 +--
 core/sqf/monitor/linux/zclient.cxx |   8 +-
 5 files changed, 87 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/3d7855b6/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx 
b/core/sqf/monitor/linux/cluster.cxx
index d1b3e91..83ea923 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -380,45 +380,46 @@ void CCluster::AssignMonitorLeader( int pnid )
     int i = 0;
     int rc = 0;
     
-    int lMonitorLeaderPNid = MonitorLeaderPNid;
+    int monitorLeaderPNid = monitorLeaderPNid_;
     CNode *node = NULL;
 
-    if (MonitorLeaderPNid != pnid) 
+    if (monitorLeaderPNid_ != pnid) 
     {
         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | 
TRACE_SYNC | TRACE_TMSYNC))
         {
             trace_printf( "%s@%d" " - (MasterMonitor) returning, pnid %d != 
monitorLead %d\n"
-                        , method_name, __LINE__, pnid, MonitorLeaderPNid );
+                        , method_name, __LINE__, pnid, monitorLeaderPNid_ );
         }
-         return;
+        TRACE_EXIT;
+        return;
     }
 
     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | 
TRACE_SYNC | TRACE_TMSYNC))
     {
         trace_printf( "%s@%d" " - (MasterMonitor) Node "  "%d" " MonitorLeader 
failed!\n"
-                    , method_name, __LINE__, MonitorLeaderPNid );
+                    , method_name, __LINE__, monitorLeaderPNid_ );
     }
 
     for (i=0; i<GetConfigPNodesMax(); i++)
     {
-        lMonitorLeaderPNid++;
+        monitorLeaderPNid++;
 
-        if (lMonitorLeaderPNid == GetConfigPNodesMax())
+        if (monitorLeaderPNid == GetConfigPNodesMax())
         {
-            lMonitorLeaderPNid = 0; // restart with nid 0
+            monitorLeaderPNid = 0; // restart with nid 0
         }
 
-        if (lMonitorLeaderPNid == pnid)
+        if (monitorLeaderPNid == pnid)
         {
             continue; // this is the node that is going down, skip it
         }
 
-        if (Node[lMonitorLeaderPNid] == NULL)
+        if (Node[monitorLeaderPNid] == NULL)
         {
             continue;
         }
 
-        node = Node[lMonitorLeaderPNid];
+        node = Node[monitorLeaderPNid];
 
         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | 
TRACE_SYNC | TRACE_TMSYNC))
         {
@@ -438,11 +439,11 @@ void CCluster::AssignMonitorLeader( int pnid )
             continue; // skip this node for any of the above reasons 
         }  
 
-        MonitorLeaderPNid = node->GetPNid();
+        monitorLeaderPNid_ = node->GetPNid();
 
         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | 
TRACE_SYNC | TRACE_TMSYNC))
         {
-            trace_printf("%s@%d" " - Node "  "%d" " is the new 
MonitorLeaderPNid." "\n", method_name, __LINE__, MonitorLeaderPNid);
+            trace_printf("%s@%d" " - Node "  "%d" " is the new 
monitorLeaderPNid_." "\n", method_name, __LINE__, monitorLeaderPNid_);
         }
 
         if (ZClientEnabled)
@@ -483,7 +484,7 @@ void CCluster::AssignMonitorLeader( int pnid )
     TRACE_EXIT;
 }
 
-// Assigns a new TMLeader if given pnid is same as TmLeaderNid 
+// Assigns a new TMLeader if given pnid is same as tmLeaderNid_ 
 // TmLeader is a logical node num. 
 // pnid has gone down, so if that node was previously the TM leader, a new one 
needs to be chosen.
 void CCluster::AssignTmLeader( int pnid, bool checkProcess )
@@ -495,15 +496,15 @@ void CCluster::AssignTmLeader( int pnid, bool 
checkProcess )
     CNode *node = NULL;
     CProcess *process = NULL;
 
-    int TmLeaderPNid = LNode[TmLeaderNid]->GetNode()->GetPNid();
+    int TmLeaderPNid = LNode[tmLeaderNid_]->GetNode()->GetPNid();
 
     if (TmLeaderPNid != pnid) 
     {
-        node = LNode[TmLeaderNid]->GetNode();
+        node = LNode[tmLeaderNid_]->GetNode();
 
         if (checkProcess)
         {
-            process = LNode[TmLeaderNid]->GetProcessLByType( ProcessType_DTM );
+            process = LNode[tmLeaderNid_]->GetProcessLByType( ProcessType_DTM 
);
             if (process)
             {
                 if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | 
TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC))
@@ -544,7 +545,7 @@ void CCluster::AssignTmLeader( int pnid, bool checkProcess )
     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | 
TRACE_SYNC | TRACE_TMSYNC))
     {
         trace_printf( "%s@%d" " - Node "  "%d" " TmLeader failed! 
(checkProcess=%d)\n"
-                    , method_name, __LINE__, TmLeaderNid, checkProcess );
+                    , method_name, __LINE__, tmLeaderNid_, checkProcess );
     }
 
     for (i=0; i<GetConfigPNodesMax(); i++)
@@ -586,11 +587,11 @@ void CCluster::AssignTmLeader( int pnid, bool 
checkProcess )
             continue; // skip this node for any of the above reasons 
         }  
 
-        TmLeaderNid = node->GetFirstLNode()->GetNid();
+        tmLeaderNid_ = node->GetFirstLNode()->GetNid();
 
         if (checkProcess)
         {
-            process = LNode[TmLeaderNid]->GetProcessLByType( ProcessType_DTM );
+            process = LNode[tmLeaderNid_]->GetProcessLByType( ProcessType_DTM 
);
             if (!process)
             {
                 continue; // skip this node no DTM process exists
@@ -599,7 +600,7 @@ void CCluster::AssignTmLeader( int pnid, bool checkProcess )
 
         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | 
TRACE_SYNC | TRACE_TMSYNC))
         {
-            trace_printf("%s@%d" " - Node "  "%d" " is the new TmLeader." 
"\n", method_name, __LINE__, TmLeaderNid);
+            trace_printf("%s@%d" " - Node "  "%d" " is the new TmLeader." 
"\n", method_name, __LINE__, tmLeaderNid_);
         }
 
         break;
@@ -618,14 +619,13 @@ CCluster::CCluster (void)
       ,epollFD_(-1),
       Node (NULL),
       LNode (NULL),
-      TmSyncPNid (-1),
-      CurNodes (0),
-      CurProcs (0),
+      tmSyncPNid_ (-1),
+      currentNodes_ (0),
       configPNodesCount_ (-1),
       configPNodesMax_ (-1),
-      NodeMap (NULL),
-      TmLeaderNid (-1),
-      MonitorLeaderPNid (-1),
+      nodeMap_ (NULL),
+      tmLeaderNid_ (-1),
+      monitorLeaderPNid_ (-1),
       tmReadyCount_(0),
       minRecvCount_(4096),
       recvBuffer_(NULL),
@@ -795,10 +795,10 @@ CCluster::~CCluster (void)
     delete [] otherMonRank_;
     delete [] socks_;
     delete [] sockPorts_;
-    if (NodeMap)
+    if (nodeMap_)
     {
-        delete [] NodeMap;
-        NodeMap = NULL;
+        delete [] nodeMap_;
+        nodeMap_ = NULL;
     }
 
     delete [] recvBuffer2_;
@@ -2416,7 +2416,7 @@ void CCluster::HandleOtherNodeMsg (struct 
internal_msg_def *recv_msg,
                     // Begin a Slave Sync Start
                     if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                         trace_printf("%s@%d - Slave Sync Start on Node %s 
(pnid=%d)\n", method_name, __LINE__, Node[pnid]->GetName(), pnid);
-                    TmSyncPNid = pnid;
+                    tmSyncPNid_ = pnid;
                     Node[pnid]->SetTmSyncState( recv_msg->u.sync.state );
                     if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                     {
@@ -2430,12 +2430,12 @@ void CCluster::HandleOtherNodeMsg (struct 
internal_msg_def *recv_msg,
                         trace_printf("%s@%d - Sync State Collision! Node %s 
(pnid=%d) TmSyncState=(%d)(%s)\n", method_name, __LINE__, MyNode->GetName(), 
MyPNID, MyNode->GetTmSyncState(), SyncStateString( MyNode->GetTmSyncState()) );
                     if ( MyNode->GetTmSyncState() == SyncState_Continue )
                     {
-                        if ( pnid > TmSyncPNid ) 
+                        if ( pnid > tmSyncPNid_ ) 
                             // highest node id will continue
                         {
                             // They take priority ... we abort
                             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
-                                trace_printf("%s@%d - Aborting Slave Sync 
Start on node %s (pnid=%d)\n", method_name, __LINE__, 
Node[Monitor->TmSyncPNid]->GetName(), Monitor->TmSyncPNid);
+                                trace_printf("%s@%d - Aborting Slave Sync 
Start on node %s (pnid=%d)\n", method_name, __LINE__, 
Node[Monitor->tmSyncPNid_]->GetName(), Monitor->tmSyncPNid_);
                             MyNode->SetTmSyncState( SyncState_Null );
                             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                                 trace_printf("%s@%d - Node %s (pnid=%d) 
TmSyncState updated (%d)(%s)\n", method_name, __LINE__, MyNode->GetName(), 
MyPNID, MyNode->GetTmSyncState(), SyncStateString( MyNode->GetTmSyncState() ) );
@@ -2443,7 +2443,7 @@ void CCluster::HandleOtherNodeMsg (struct 
internal_msg_def *recv_msg,
                             // Continue with other node's Slave TmSync Start 
request
                             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                                 trace_printf("%s@%d - Slave Sync Start on node 
%s (pnid=%d)\n", method_name, __LINE__, Node[pnid]->GetName(), pnid);
-                            TmSyncPNid = pnid;
+                            tmSyncPNid_ = pnid;
                             Node[pnid]->SetTmSyncState( recv_msg->u.sync.state 
);
                             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                             {
@@ -2467,7 +2467,7 @@ void CCluster::HandleOtherNodeMsg (struct 
internal_msg_def *recv_msg,
                             // Continue with other node's Slave TmSync Start 
request
                             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                                 trace_printf("%s@%d - Slave Sync Start on node 
%s (pnid=%d)\n", method_name, __LINE__, Node[pnid]->GetName(), pnid);
-                            TmSyncPNid = pnid;
+                            tmSyncPNid_ = pnid;
                             Node[pnid]->SetTmSyncState( recv_msg->u.sync.state 
);
                             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                             {
@@ -2783,9 +2783,9 @@ void CCluster::HandleMyNodeMsg (struct internal_msg_def 
*recv_msg,
         case SyncType_TmData:
             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
                 trace_printf("%s@%d    - TMSYNC(TmData) on Node %s 
(pnid=%d)\n", method_name, __LINE__, Node[MyPNID]->GetName(), MyPNID);
-            TmSyncPNid = MyPNID;
+            tmSyncPNid_ = MyPNID;
             if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
-                trace_printf("%s@%d    - Sync communicated, TmSyncPNid=%d\n", 
method_name, __LINE__, TmSyncPNid);
+                trace_printf("%s@%d    - Sync communicated, tmSyncPNid_=%d\n", 
method_name, __LINE__, tmSyncPNid_);
             if ( ! MyNode->IsSpareNode() && MyNode->GetPhase() != Phase_Ready )
             {
                 MyNode->CheckActivationPhase();
@@ -2974,7 +2974,7 @@ void CCluster::InitializeConfigCluster( void )
     int rankToPnid[worldSize];
     CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
     
-    CurNodes = worldSize;
+    currentNodes_ = worldSize;
 
     if ( IsRealCluster )
     {
@@ -3038,10 +3038,10 @@ void CCluster::InitializeConfigCluster( void )
         int TmLeaderPNid = -1;
         if (IsMaster)
         {
-            TmLeaderNid = Nodes->GetFirstNid();
-            TmLeaderPNid = LNode[TmLeaderNid]->GetNode()->GetPNid();
+            tmLeaderNid_ = Nodes->GetFirstNid();
+            TmLeaderPNid = LNode[tmLeaderNid_]->GetNode()->GetPNid();
         }
-        // Non-master monitors in AGENT mode in a real cluster initialize all
+        // Monitors processes in AGENT mode in a real cluster initialize all
         // remote nodes to a down state. The master monitor and the joining
         // monitors will set the joining node state to up as part of the node
         // re-integration processing as monitor processes join the cluster
@@ -3179,8 +3179,8 @@ void CCluster::InitializeConfigCluster( void )
             delete [] commPortNums;
             delete [] syncPortNums;
 
-            TmLeaderNid = Nodes->GetFirstNid();
-            int TmLeaderPNid = LNode[TmLeaderNid]->GetNode()->GetPNid();
+            tmLeaderNid_ = Nodes->GetFirstNid();
+            int TmLeaderPNid = LNode[tmLeaderNid_]->GetNode()->GetPNid();
 
             // Any nodes not in the initial MPI_COMM_WORLD are down.
             for (int i=0; i<GetConfigPNodesCount(); ++i)
@@ -3220,7 +3220,7 @@ void CCluster::InitializeConfigCluster( void )
         }
         else
         {
-            TmLeaderNid = 0;
+            tmLeaderNid_ = 0;
         }
 
         // Initialize communicators for point-to-point communications
@@ -3828,7 +3828,7 @@ void CCluster::ReIntegrateMPI( int initProblem )
         {   // Already connected to creator monitor
             comms_[i] = intraCommCreatorMon;
             otherMonRank_[i] = 0;
-            ++CurNodes;
+            ++currentNodes_;
 
             // Set bit indicating node is up
             upNodes_.upNodes[i/MAX_NODE_BITMASK] |= (1ull << 
(i%MAX_NODE_BITMASK));
@@ -3922,7 +3922,7 @@ void CCluster::ReIntegrateMPI( int initProblem )
 
             comms_[i] = intraComm;
             otherMonRank_[i] = 0;
-            ++CurNodes;
+            ++currentNodes_;
             Node[i]->SetSyncPort( nodeInfo[i].syncPort );
             Node[i]->SetState( State_Up );
 
@@ -4163,7 +4163,7 @@ void CCluster::ReIntegrateSock( int initProblem )
             }
 
             otherMonRank_[nodeInfo[i].pnid] = 0;
-            ++CurNodes;
+            ++currentNodes_;
 
             // Store port numbers for the node
             strncpy(commPort, nodeInfo[i].commPort, MPI_MAX_PORT_NAME);
@@ -4310,7 +4310,7 @@ void CCluster::ReIntegrateSock( int initProblem )
             }
 
             otherMonRank_[nodeInfo[i].pnid] = 0;
-            ++CurNodes;
+            ++currentNodes_;
 
             // Store port numbers for the node
             strncpy(commPort, nodeInfo[i].commPort, MPI_MAX_PORT_NAME);
@@ -4583,7 +4583,7 @@ void CCluster::setNewComm( int pnid )
                     close( socks_[pnid] );
                     socks_[pnid] = -1;
                 }
-                --CurNodes;
+                --currentNodes_;
             }
 
             if (trace_settings & TRACE_RECOVERY)
@@ -4595,7 +4595,7 @@ void CCluster::setNewComm( int pnid )
 
             comms_[it->pnid] = it->comm;
             otherMonRank_[it->pnid] = it->otherRank;
-            ++CurNodes;
+            ++currentNodes_;
             // Set bit indicating node is up
             upNodes_.upNodes[it->pnid/MAX_NODE_BITMASK] |= (1ull << 
(it->pnid%MAX_NODE_BITMASK));
 
@@ -4686,14 +4686,14 @@ void CCluster::setNewSock( int pnid )
                 shutdown( socks_[pnid], SHUT_RDWR);
                 close( socks_[pnid] );
                 socks_[pnid] = -1;
-                --CurNodes;
+                --currentNodes_;
             }
 
             CNode *node= Nodes->GetNode( it->pnid );
             socks_[it->pnid] = it->socket;
             sockPorts_[it->pnid] = node->GetSyncSocketPort();
             otherMonRank_[it->pnid] = it->otherRank;
-            ++CurNodes;
+            ++currentNodes_;
 
             if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
             {
@@ -6040,7 +6040,7 @@ void CCluster::ValidateClusterState( cluster_state_def_t 
nodestate[],
             // Evaluate each active (up) node in the cluster
             int pnodesCount = 0;
             for (int index = 0;
-                 index < GetConfigPNodesMax() && pnodesCount < CurNodes;
+                 index < GetConfigPNodesMax() && pnodesCount < currentNodes_;
                  ++index)
             {
                 if ( nodestate[index].seq_num != 0 )
@@ -6104,11 +6104,11 @@ void CCluster::ValidateClusterState( 
cluster_state_def_t nodestate[],
 
             if (trace_settings & (TRACE_SYNC | TRACE_RECOVERY | TRACE_INIT))
             {
-                trace_printf("%s@%d concurringNodes=%d, CurNodes=%d\n",
-                             method_name, __LINE__, concurringNodes, CurNodes);
+                trace_printf("%s@%d concurringNodes=%d, currentNodes_=%d\n",
+                             method_name, __LINE__, concurringNodes, 
currentNodes_);
             }
 
-            if (concurringNodes == CurNodes)
+            if (concurringNodes == currentNodes_)
             {   // General agreement that node is down, proceed to mark it down
 
                 CNode *downNode = Nodes->GetNode( it->exitedPnid );
@@ -6149,7 +6149,7 @@ void CCluster::ValidateClusterState( cluster_state_def_t 
nodestate[],
                           "%d but only %d of %d nodes also lost the "
                           "connection.  See up: %s.  See down: %s.  So node "
                           "%d is going down (at seq #%lld).\n", method_name,
-                          it->exitedPnid, concurringNodes, CurNodes,
+                          it->exitedPnid, concurringNodes, currentNodes_,
                           setSeesUp.c_str(), setSeesDown.c_str(),
                           MyPNID, seqNum_ );
                 mon_log_write(MON_CLUSTER_VALIDATE_STATE_2, SQ_LOG_ERR, buf);
@@ -6198,7 +6198,7 @@ void CCluster::ValidateClusterState( cluster_state_def_t 
nodestate[],
 
         int pnodesCount2 = 0;
         for (int remIndex = 0;
-             remIndex < GetConfigPNodesMax() && pnodesCount2 < CurNodes;
+             remIndex < GetConfigPNodesMax() && pnodesCount2 < currentNodes_;
              ++remIndex)
         {
             bool someExited = false;
@@ -6248,7 +6248,7 @@ void CCluster::ValidateClusterState( cluster_state_def_t 
nodestate[],
             {  // This remote node sees node pnid as up
                 int pnodesCount3 = 0;
                 for (int exitedPNid = 0;
-                     exitedPNid < GetConfigPNodesMax() && pnodesCount3 < 
CurNodes;
+                     exitedPNid < GetConfigPNodesMax() && pnodesCount3 < 
currentNodes_;
                      ++exitedPNid)
                 {
                     CNode *exitedNode = Nodes->GetNode( 
/*indexToPnid_[remIndex]*/exitedPNid );
@@ -6666,7 +6666,7 @@ void CCluster::UpdateClusterState( bool &doShutdown,
                     abort();
             }
             Node[index]->SetState( State_Down );
-            --CurNodes;
+            --currentNodes_;
             // Clear bit in set of "up nodes"
             upNodes_.upNodes[index/MAX_NODE_BITMASK] &= ~(1ull << 
(index%MAX_NODE_BITMASK));
         }
@@ -6738,7 +6738,7 @@ void CCluster::UpdateClusterState( bool &doShutdown,
                         // Programmer bonehead!
                         abort();
                 }
-                --CurNodes;
+                --currentNodes_;
 
                 // Clear bit in set of "up nodes"
                 upNodes_.upNodes[index/MAX_NODE_BITMASK] &= ~(1ull << 
(index%MAX_NODE_BITMASK));
@@ -7068,14 +7068,14 @@ bool CCluster::checkIfDone (  )
 
     if (trace_settings & TRACE_SYNC_DETAIL)
         trace_printf("%s@%d - Node %d shutdown level=%d, state=%s.  Process "
-                     "count=%d, internal state=%d, CurNodes=%d, "
+                     "count=%d, internal state=%d, currentNodes_=%d, "
                      "local process count=%d\n",
                      method_name, __LINE__, MyNode->GetPNid(),
                      MyNode->GetShutdownLevel(),
                      StateString(MyNode->GetState()),
                      Nodes->ProcessCount(),
                      MyNode->getInternalState(),
-                     CurNodes, MyNode->GetNumProcs());
+                     currentNodes_, MyNode->GetNumProcs());
 
     // Check if we are also done
     if (( MyNode->GetState() != State_Down    ) &&
@@ -7094,7 +7094,7 @@ bool CCluster::checkIfDone (  )
                 return false;
             }
             else if ( (Nodes->ProcessCount() <=
-                      (CurNodes*MAX_PRIMITIVES))        // only WDGs alive
+                      (currentNodes_*MAX_PRIMITIVES))        // only WDGs alive
                       && !MyNode->isInQuiesceState()    // post-quiescing will
                                                         // expire WDG (cluster)
                       && !waitForWatchdogExit_ )        // WDG not yet exiting

http://git-wip-us.apache.org/repos/asf/trafodion/blob/3d7855b6/core/sqf/monitor/linux/cluster.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.h b/core/sqf/monitor/linux/cluster.h
index 6b658ae..ff49e56 100644
--- a/core/sqf/monitor/linux/cluster.h
+++ b/core/sqf/monitor/linux/cluster.h
@@ -124,12 +124,12 @@ public:
 
     void DoDeviceReq(char * ldevname);
     void ExpediteDown( void );
-    inline int  GetTmLeader( void ) { return( TmLeaderNid); }
-    inline void SetTmLeader( int tmLeaderNid ) { TmLeaderNid = tmLeaderNid; } 
-    inline int  GetMonitorLeader( void ) { return( MonitorLeaderPNid); }
-    inline void SetMonitorLeader( int monitorLeaderPNid ) { MonitorLeaderPNid 
= monitorLeaderPNid; } 
+    inline int  GetTmLeader( void ) { return( tmLeaderNid_ ); }
+    inline void SetTmLeader( int tmLeaderNid ) { tmLeaderNid_ = tmLeaderNid; } 
+    inline int  GetMonitorLeader( void ) { return( monitorLeaderPNid_); }
+    inline void SetMonitorLeader( int monitorLeaderPNid ) { monitorLeaderPNid_ 
= monitorLeaderPNid; } 
     int  GetDownedNid( void );
-    inline int GetTmSyncPNid( void ) { return( TmSyncPNid ); } // Physical 
Node ID of current TmSync operations master
+    inline int GetTmSyncPNid( void ) { return( tmSyncPNid_ ); } // Physical 
Node ID of current TmSync operations master
     void InitClusterComm(int worldSize, int myRank, int *rankToPnid);
     void addNewComm(int nid, int otherRank, MPI_Comm comm);
     void addNewSock(int nid, int otherRank, int sockFd );
@@ -210,7 +210,7 @@ protected:
 
     CNode  **Node;           // array of nodes
     CLNode **LNode;          // array of logical nodes
-    int      TmSyncPNid;     // Physical Node ID of current TmSync operations 
master
+    int      tmSyncPNid_;    // Physical Node ID of current TmSync operations 
master
 
 
     void AddTmsyncMsg( struct sync_buffer_def *tmSyncBuffer
@@ -229,15 +229,14 @@ protected:
     CLock syncCycle_;
 
 private:
-    int     CurNodes;       // Current # of nodes in the cluster
-    int     CurProcs;       // Current # if processes alive in MPI_COMM_WORLD
+    int     currentNodes_;      // Current # of nodes in the cluster
     int     configPNodesCount_; // # of physical nodes configured
     int     configPNodesMax_;   // max # of physical nodes that can be 
configured
-    int    *NodeMap;        // Mapping of Node ranks to COMM_WORLD ranks
-    int     TmLeaderNid;    // Nid of currently assigned TM Leader node
-    int     MonitorLeaderPNid; // PNid of currently assigned Monitor leader 
node
-    int     tmReadyCount_;  // # of DTM processes ready for transactions
-    size_t  minRecvCount_;  // minimum size of receive buffer for allgather
+    int    *nodeMap_;           // Mapping of Node ranks to COMM_WORLD ranks
+    int     tmLeaderNid_;       // Nid of currently assigned TM Leader node
+    int     monitorLeaderPNid_; // PNid of currently assigned Monitor leader 
node
+    int     tmReadyCount_;      // # of DTM processes ready for transactions
+    size_t  minRecvCount_;      // minimum size of receive buffer for allgather
 
     // Pointer to array of "sync_buffer_def" structures.  Used by
     // ShareWithPeers in "Allgather" operation.

http://git-wip-us.apache.org/repos/asf/trafodion/blob/3d7855b6/core/sqf/monitor/linux/pnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx
index 485d013..783640f 100644
--- a/core/sqf/monitor/linux/pnode.cxx
+++ b/core/sqf/monitor/linux/pnode.cxx
@@ -1709,7 +1709,6 @@ void CNodeContainer::AddNodes( )
         else
         {
             if (pnid >= maxNode) // only for workstation acting as single node
-//             || (IsAgentMode &&(strcmp( MasterMonitorName, Node_name ) != 
0)))   
             {
                 rank = -1; // -1 creates node in down state
             }

http://git-wip-us.apache.org/repos/asf/trafodion/blob/3d7855b6/core/sqf/monitor/linux/tmsync.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/tmsync.cxx 
b/core/sqf/monitor/linux/tmsync.cxx
index 60d9f40..b87f0f4 100644
--- a/core/sqf/monitor/linux/tmsync.cxx
+++ b/core/sqf/monitor/linux/tmsync.cxx
@@ -321,7 +321,7 @@ int CTmSync_Container::CoordinateTmDataBlock ( struct 
sync_def *sync )
                 exchangeTmSyncData( sync, false );
                 syncCycle_.unlock();
                 ExchangeTmSyncState( false );
-                if (( Monitor->TmSyncPNid == MyPNID                           
) &&
+                if (( Monitor->tmSyncPNid_ == MyPNID                           
) &&
                     ( Nodes->GetTmState( SyncState_Start ) == SyncState_Start 
)   )
                 {
                     // send unsolicited messages to other TMs in
@@ -353,7 +353,7 @@ int CTmSync_Container::CoordinateTmDataBlock ( struct 
sync_def *sync )
                 else
                 {
                     if (trace_settings & (TRACE_SYNC | TRACE_TMSYNC))
-                       trace_printf("%s@%d" " - Tm Sync failed to start, 
TmSyncPNid=%d, MyPNID=%d, " "TmSyncState=%d, expecting=%d\n", method_name, 
__LINE__, TmSyncPNid, MyPNID, Nodes->GetTmState( SyncState_Start ), 
SyncState_Start);
+                       trace_printf("%s@%d" " - Tm Sync failed to start, 
tmSyncPNid_=%d, MyPNID=%d, " "TmSyncState=%d, expecting=%d\n", method_name, 
__LINE__, tmSyncPNid_, MyPNID, Nodes->GetTmState( SyncState_Start ), 
SyncState_Start);
                     if (MyNode->GetTmSyncState() == SyncState_Start)
                     {
                         MyNode->SetTmSyncState( SyncState_Null );
@@ -449,7 +449,7 @@ void CTmSync_Container::EndTmSync( MSGTYPE type )
             {
                 trace_printf("%s@%d - Request (%p) nid=%d, handle=%d, tag=%d, 
unsol=%d, comp=%d\n", method_name, __LINE__, req, req->Nid, req->Handle, 
req->Tag, req->Unsolicited, req->Completed);
             }
-            if ( TmSyncPNid == MyPNID )
+            if ( tmSyncPNid_ == MyPNID )
             {
                 if ( MyNode->GetLNodesCount() > 1 )
                 {
@@ -666,7 +666,7 @@ void CTmSync_Container::ProcessTmSyncReply( struct 
message_def * msg )
             TmSyncReplyCode |= msg->u.reply.u.unsolicited_tm_sync.return_code;
             tmsync_req->Completed = true;
             UnsolicitedComplete( msg );
-            if ( TmSyncPNid == MyPNID )
+            if ( tmSyncPNid_ == MyPNID )
             {
                 if (trace_settings & (TRACE_REQUEST | TRACE_TMSYNC))
                     trace_printf("%s@%d - Local Unsolicited TmSync reply, 
handle="
@@ -1102,7 +1102,7 @@ bool CTmSync_Container::TmSyncPending( void )
        trace_printf("%s@%d" " - PendingTmSync=%d, total=%d, replies=%d, 
pending=%d\n", method_name, __LINE__, PendingSlaveTmSync, 
GetTotalSlaveTmSyncCount(), GetTmSyncReplies(), GetPendingSlaveTmSyncCount() );
 
     if (( MyNode->GetTmSyncState() == SyncState_Abort ) &&
-        ( TmSyncPNid != MyPNID ) &&
+        ( tmSyncPNid_ != MyPNID ) &&
         ( GetTmSyncReplies() == GetTotalSlaveTmSyncCount() )   )
     {
         CommitTmDataBlock( MPI_ERR_UNKNOWN );

http://git-wip-us.apache.org/repos/asf/trafodion/blob/3d7855b6/core/sqf/monitor/linux/zclient.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/zclient.cxx 
b/core/sqf/monitor/linux/zclient.cxx
index 107cf32..1c133ca 100644
--- a/core/sqf/monitor/linux/zclient.cxx
+++ b/core/sqf/monitor/linux/zclient.cxx
@@ -523,6 +523,8 @@ const char* CZClient::WaitForAndReturnMaster( bool doWait )
             {
                 break;
             } 
+            usleep(1000000); // sleep for a second as to not overwhelm the 
system   
+            retries++;
             continue;
         }
         else if ( rc == ZOK )
@@ -549,14 +551,14 @@ const char* CZClient::WaitForAndReturnMaster( bool doWait 
)
                     break;
                 }
                 usleep(1000000); // sleep for a second as to not overwhelm the 
system   
-               retries++;
+                retries++;
                 continue;
             }
         }
          
         else  // error
         { 
-           if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+            if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
             {
                 trace_printf( "%s@%d Error (MasterMonitor) 
WaitForAndReturnMaster returned rc (%d), retries %d\n"
                         , method_name, __LINE__, rc, retries );
@@ -946,6 +948,8 @@ int CZClient::CreateMasterZNode(  const char *nodeName )
                 , "[%s], RegisterZNode(%s) failed with error %s\n"
                 , method_name, monData.c_str(), zerror(rc) );
         mon_log_write(MON_ZCLIENT_CREATEMASTERZNODE, SQ_LOG_ERR, buf);
+
+        TRACE_EXIT;
         return(rc); // Return the error
     }
     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))

Reply via email to