Name Server enabled fixes of problems during shutdown.
Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/db656603 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/db656603 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/db656603 Branch: refs/heads/master Commit: db656603237af4f376a2cca27fe0500b7461380a Parents: a528474 Author: Zalo Correa <[email protected]> Authored: Wed Apr 18 11:41:21 2018 -0700 Committer: Zalo Correa <[email protected]> Committed: Wed Apr 18 11:41:21 2018 -0700 ---------------------------------------------------------------------- core/sqf/monitor/linux/cluster.cxx | 214 ++++++++++++++++++--- core/sqf/monitor/linux/cluster.h | 8 +- core/sqf/monitor/linux/nameserver.cxx | 2 +- core/sqf/monitor/linux/nscommacceptmon.cxx | 5 +- core/sqf/monitor/linux/nscommacceptmon.h | 2 + core/sqf/monitor/linux/nsreqprocinfons.cxx | 160 +++++++++++----- core/sqf/monitor/linux/nsreqshutdown.cxx | 18 +- core/sqf/monitor/linux/pnode.cxx | 239 +++++++++++++++++++++++- core/sqf/monitor/linux/pnode.h | 16 ++ core/sqf/monitor/linux/process.cxx | 24 +++ core/sqf/monitor/linux/reqtmleader.cxx | 36 ++++ 11 files changed, 635 insertions(+), 89 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/cluster.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx index e2f7dbf..4646433 100644 --- a/core/sqf/monitor/linux/cluster.cxx +++ b/core/sqf/monitor/linux/cluster.cxx @@ -66,6 +66,8 @@ using namespace std; #include "meas.h" #ifdef NAMESERVER_PROCESS #include "nscommacceptmon.h" +#else +#include "nameserver.h" #endif extern bool IAmIntegrating; @@ -85,6 +87,7 @@ extern char MySyncPort[MPI_MAX_PORT_NAME]; extern CCommAcceptMon CommAcceptMon; extern char MyMon2NsPort[MPI_MAX_PORT_NAME]; #else +extern CNameServer *NameServer; extern bool NameServerEnabled; extern char MyPtPPort[MPI_MAX_PORT_NAME]; #endif @@ -120,6 +123,8 @@ extern CReplicate Replicator; extern char *ErrorMsg (int error_code); +extern const char *ProcessTypeString( PROCESSTYPE type ); + const char *JoiningPhaseString( JOINING_PHASE phase); const char *StateString( STATE state); #ifndef NAMESERVER_PROCESS @@ -538,6 +543,13 @@ void CCluster::AssignTmLeader( int pnid, bool checkProcess ) int TmLeaderPNid = LNode[tmLeaderNid_]->GetNode()->GetPNid(); + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - pnid=%d, checkProcess=%d, tmLeaderNid_=%d, TmLeaderPNid=%d\n" + , method_name, __LINE__ + , pnid, checkProcess, tmLeaderNid_, TmLeaderPNid ); + } + if (TmLeaderPNid != pnid) { node = LNode[tmLeaderNid_]->GetNode(); @@ -561,6 +573,36 @@ void CCluster::AssignTmLeader( int pnid, bool checkProcess ) } return; } + else + { + if (NameServerEnabled) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - Getting process from Name Server, nid=%d, type%s\n" + , method_name, __LINE__ + , tmLeaderNid_, ProcessTypeString(ProcessType_DTM) ); + } + + process = Nodes->GetProcessLByTypeNs( tmLeaderNid_, ProcessType_DTM ); + if (process) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + if (node) + trace_printf( "%s@%d - Node pnid=%d (%s), phase=%s, " + "isSoftNodeDown=%d, checkProcess=%d\n" + , method_name, __LINE__ + , node->GetPNid() + , node->GetName() + , NodePhaseString(node->GetPhase()) + , node->IsSoftNodeDown() + , checkProcess ); + } + return; + } + } + } } else { @@ -698,6 +740,7 @@ CCluster::CCluster (void) reconnectSeqNum_(0), seqNum_(1), waitForWatchdogExit_(false) + ,waitForNameServerExit_(false) ,checkSeqNum_(false) ,validateNodeDown_(false) ,enqueuedDown_(false) @@ -1455,8 +1498,8 @@ int CCluster::HardNodeUp( int pnid, char *node_name ) TRACE_ENTRY; if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY)) - trace_printf( "%s@%d - pnid=%d, name=%s (MyPNID = %d)\n" - , method_name, __LINE__, pnid, node_name, MyPNID ); + trace_printf( "%s@%d - pnid=%d, name=%s (MyPNID = %d), currentNodes_=%d\n" + , method_name, __LINE__, pnid, node_name, MyPNID, currentNodes_ ); if ( pnid == -1 ) { @@ -3170,13 +3213,19 @@ void CCluster::InitializeConfigCluster( void ) char *nodes = getenv( "SQ_VIRTUAL_NODES" ); worldSize = atoi(nodes); if ( worldSize <= 0 ) + { worldSize = 1; + } } #endif int rankToPnid[worldSize]; CClusterConfig *clusterConfig = Nodes->GetClusterConfig(); +#ifdef NAMESERVER_PROCESS + currentNodes_ = 1; // non-master Name Servers join set through master Name Server +#else currentNodes_ = worldSize; +#endif if ( IsRealCluster ) { @@ -7361,7 +7410,38 @@ bool CCluster::checkIfDone ( ) const char method_name[] = "CCluster::checkIfDone"; TRACE_ENTRY; - if (trace_settings & TRACE_SYNC_DETAIL) +#ifdef NAMESERVER_PROCESS + int nameServerCount = 0; + CClusterConfig *clusterConfig = Nodes->GetClusterConfig(); + CNameServerConfigContainer *nameServerConfig = NULL; + + if (clusterConfig) + { + nameServerConfig = Nodes->GetNameServerConfig(); + if (nameServerConfig) + { + nameServerCount = nameServerConfig->GetCount(); + } + } + + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Node %d shutdown level=%d, state=%s. Process " + "count=%d, internal state=%d, currentNodes_=%d, " + "local process count=%d, shutdownNameServer=%d, " + "nameServerCount=%d\n", + method_name, __LINE__, + MyNode->GetPNid(), + MyNode->GetShutdownLevel(), + StateString(MyNode->GetState()), + Nodes->ProcessCount(), + MyNode->getInternalState(), + currentNodes_, + MyNode->GetNumProcs(), + MyNode->IsShutdownNameServer(), + nameServerCount ); + +#else + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) trace_printf("%s@%d - Node %d shutdown level=%d, state=%s. Process " "count=%d, internal state=%d, currentNodes_=%d, " "local process count=%d\n", @@ -7372,45 +7452,117 @@ bool CCluster::checkIfDone ( ) MyNode->getInternalState(), currentNodes_, MyNode->GetNumProcs()); +#endif // Check if we are also done if (( MyNode->GetState() != State_Down ) && ( MyNode->GetState() != State_Stopped ) ) { if ( MyNode->GetShutdownLevel() != ShutdownLevel_Undefined ) { - if ( Nodes->ProcessCount() == 0 ) // all WDTs exited - { - if (trace_settings & TRACE_SYNC) - trace_printf("%s@%d - Monitor signaled to exit.\n", method_name, __LINE__); +#ifdef NAMESERVER_PROCESS + if ( (Nodes->ProcessCount() <= nameServerCount ) // only Name Servers alive + && (MyNode->GetNumProcs() <= MAX_PRIMITIVES ) // only My Name Server alive + && MyNode->IsShutdownNameServer() // monitor shutdown Name Server received + && !MyNode->isInQuiesceState() ) // post-quiescing will + // expire WDG (cluster) + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Name Server signaled to exit.\n", method_name, __LINE__); MyNode->SetState( State_Stopped ); MyNode->SetInternalState(State_Ready_To_Exit); // we need to sync one more time so other nodes see our state return false; } - else if ( (Nodes->ProcessCount() <= - (currentNodes_*MAX_PRIMITIVES)) // only WDGs alive - && !MyNode->isInQuiesceState() // post-quiescing will - // expire WDG (cluster) - && !waitForWatchdogExit_ ) // WDG not yet exiting +#else + if ( NameServerEnabled ) { - if (trace_settings & TRACE_SYNC) - trace_printf("%s@%d - Stopping watchdog process.\n", - method_name, __LINE__); - - waitForWatchdogExit_ = true; - // stop the watchdog timer first - HealthCheck.setState(MON_STOP_WATCHDOG); - // let the watchdog process exit - HealthCheck.setState(MON_EXIT_PRIMITIVES); + + if ( Nodes->ProcessCount() == 0 ) // all Name Servers exited + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Monitor signaled to exit.\n", method_name, __LINE__); + MyNode->SetState( State_Stopped ); + MyNode->SetInternalState(State_Ready_To_Exit); + + // we need to sync one more time so other nodes see our state + return false; + } + else if ( (Nodes->ProcessCount() <= + (currentNodes_ * (MAX_PRIMITIVES+1)) ) // only WDGs and Name Servers alive + && (MyNode->GetNumProcs() <= + (MAX_PRIMITIVES+1) ) // only WDGs and Name Servers alive + && !MyNode->isInQuiesceState() // post-quiescing will + // expire WDG (cluster) + && !waitForWatchdogExit_ ) // WDG not yet exiting + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Stopping watchdog process. " + "(process count: cluster=%d, MyNode=%d)\n", + method_name, __LINE__, + Nodes->ProcessCount(), MyNode->ProcessCount()); + + waitForWatchdogExit_ = true; + // stop the watchdog timer first + HealthCheck.setState(MON_STOP_WATCHDOG); + // let the watchdog process exit + HealthCheck.setState(MON_EXIT_PRIMITIVES); + } + else if ( (Nodes->ProcessCount() <= + (currentNodes_ * (MAX_PRIMITIVES)) ) // only Name Servers alive + && (MyNode->GetNumProcs() <= + (MAX_PRIMITIVES) ) // only Name Servers alive + && !MyNode->isInQuiesceState() // post-quiescing will + // expire WDG (cluster) + && !waitForNameServerExit_ ) // Name Server not yet exiting + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Stopping Name Server process. " + "(process count: cluster=%d, MyNode=%d)\n", + method_name, __LINE__, + Nodes->ProcessCount(), MyNode->ProcessCount()); + + waitForNameServerExit_ = true; + NameServer->ProcessShutdown(); + } + } + else + { + if ( Nodes->ProcessCount() == 0 ) // all WDTs exited + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Monitor signaled to exit.\n", method_name, __LINE__); + MyNode->SetState( State_Stopped ); + MyNode->SetInternalState(State_Ready_To_Exit); + + // we need to sync one more time so other nodes see our state + return false; + } + else if ( (Nodes->ProcessCount() <= + (currentNodes_*MAX_PRIMITIVES)) // only WDGs alive + && !MyNode->isInQuiesceState() // post-quiescing will + // expire WDG (cluster) + && !waitForWatchdogExit_ ) // WDG not yet exiting + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Stopping watchdog process.\n", + method_name, __LINE__); + + waitForWatchdogExit_ = true; + // stop the watchdog timer first + HealthCheck.setState(MON_STOP_WATCHDOG); + // let the watchdog process exit + HealthCheck.setState(MON_EXIT_PRIMITIVES); + } } +#endif } } else if ( MyNode->GetShutdownLevel() != ShutdownLevel_Undefined && MyNode->GetState() == State_Down && MyNode->GetNumProcs() == 0) { - if (trace_settings & TRACE_SYNC) + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) trace_printf("%s@%d - No processes remaining, monitor exiting.\n", method_name, __LINE__); @@ -8483,13 +8635,15 @@ void CCluster::InitServerSock( void ) , (int)((unsigned char *)addr)[3] , mon2nsPort ); MyNode->SetMon2NsPort( MyMon2NsPort ); + MyNode->SetMon2NsSocketPort( mon2nsPort ); if (trace_settings & (TRACE_INIT | TRACE_RECOVERY)) trace_printf( "%s@%d Initialized my mon2ns comm socket port, " - "pnid=%d (%s:%s) (Mon2NsCommPort=%s)\n" + "pnid=%d (%s:%s) (Mon2NsPort=%s, Mon2NsSocketPort=%d)\n" , method_name, __LINE__ , MyPNID, MyNode->GetName(), MyMon2NsPort - , MyNode->GetMon2NsPort() ); + , MyNode->GetMon2NsPort() + , MyNode->GetMon2NsSocketPort() ); } #else @@ -8883,7 +9037,17 @@ int CCluster::Connect( const char *portName ) return ( sock ); } -#ifndef NAMESERVER_PROCESS +#ifdef NAMESERVER_PROCESS +void CCluster::ConnectToMon2NsCommSelf( void ) +{ + const char method_name[] = "CCluster::ConnectToMon2NsCommSelf"; + TRACE_ENTRY; + + Connect( MyNode->GetMon2NsSocketPort() ); + + TRACE_EXIT; +} +#else void CCluster::ConnectToPtPCommSelf( void ) { const char method_name[] = "CCluster::ConnectToPtPCommSelf"; http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/cluster.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/cluster.h b/core/sqf/monitor/linux/cluster.h index f4f9147..49939ac 100644 --- a/core/sqf/monitor/linux/cluster.h +++ b/core/sqf/monitor/linux/cluster.h @@ -113,9 +113,14 @@ public: #endif int Connect( const char *portName ); void Connect( int socketPort ); -#ifndef NAMESERVER_PROCESS +#ifdef NAMESERVER_PROCESS + void ConnectToMon2NsCommSelf( void ); +#else void ConnectToPtPCommSelf( void ); #endif +#ifdef NAMESERVER_PROCESS + void ConnectToMonCommSelf( void ); +#endif void ConnectToSelf( void ); int SetKeepAliveSockOpt( int sock ); int MkCltSock( const char *portName ); @@ -318,6 +323,7 @@ private: int cumulativeDelaySec_; bool waitForWatchdogExit_; // set when watchdog exit has already been issued + bool waitForNameServerExit_; // set when Name Server exit has already been issued typedef struct state_def { http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/nameserver.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nameserver.cxx b/core/sqf/monitor/linux/nameserver.cxx index ef67562..d367085 100644 --- a/core/sqf/monitor/linux/nameserver.cxx +++ b/core/sqf/monitor/linux/nameserver.cxx @@ -629,7 +629,7 @@ int CNameServer::ProcessShutdown( void ) msgshutdown->nid = -1; msgshutdown->pid = -1; //msgshutdown->level = msgIn->u.request.u.shutdown.level; - msgshutdown->level = ShutdownLevel_Abrupt; + msgshutdown->level = ShutdownLevel_Normal; int error = SendReceive(&msg ); http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/nscommacceptmon.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nscommacceptmon.cxx b/core/sqf/monitor/linux/nscommacceptmon.cxx index cc91eba..6282b0a 100644 --- a/core/sqf/monitor/linux/nscommacceptmon.cxx +++ b/core/sqf/monitor/linux/nscommacceptmon.cxx @@ -53,6 +53,7 @@ CCommAcceptMon::CCommAcceptMon() : accepting_(false) , shutdown_(false) , thread_id_(0) + , process_thread_id_(0) { const char method_name[] = "CCommAcceptMon::CCommAcceptMon"; TRACE_ENTRY; @@ -668,7 +669,7 @@ void CCommAcceptMon::processNewSock( int joinFd ) Context *ctx = new Context(); ctx->this_ = this; ctx->pendingFd_ = joinFd; - rc = pthread_create(&thread_id_, NULL, mon2nsProcess, ctx); + rc = pthread_create(&process_thread_id_, NULL, mon2nsProcess, ctx); if (rc != 0) { char buf[MON_STRING_BUF_SIZE]; @@ -777,7 +778,7 @@ void CCommAcceptMon::shutdownWork(void) // Set flag that tells the commAcceptor thread to exit shutdown_ = true; - Monitor->ConnectToSelf(); + Monitor->ConnectToMon2NsCommSelf(); CLock::wakeOne(); if ( trace_settings & ( TRACE_NS ) ) http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/nscommacceptmon.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nscommacceptmon.h b/core/sqf/monitor/linux/nscommacceptmon.h index 30daa92..41b2c9b 100644 --- a/core/sqf/monitor/linux/nscommacceptmon.h +++ b/core/sqf/monitor/linux/nscommacceptmon.h @@ -70,6 +70,8 @@ private: // commAccept thread's id pthread_t thread_id_; + // commAccept thread's id + pthread_t process_thread_id_; enum { HEURISTIC_COUNT = 10 }; }; http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/nsreqprocinfons.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nsreqprocinfons.cxx b/core/sqf/monitor/linux/nsreqprocinfons.cxx index 3be8ddb..dbbf80c 100644 --- a/core/sqf/monitor/linux/nsreqprocinfons.cxx +++ b/core/sqf/monitor/linux/nsreqprocinfons.cxx @@ -39,6 +39,8 @@ extern CNodeContainer *Nodes; extern CReplicate Replicator; extern int MyPNID; +extern const char *ProcessTypeString( PROCESSTYPE type ); + CExtProcInfoNsReq::CExtProcInfoNsReq( reqQueueMsg_t msgType, int nid, int pid, int sockFd, struct message_def *msg ) @@ -55,48 +57,110 @@ CExtProcInfoNsReq::~CExtProcInfoNsReq() } // Copy information for a specific process into the reply message buffer. -void CExtProcInfoNsReq::copyInfo(CProcess *process, ProcessInfoNs_reply_def &processInfo) +void CExtProcInfoNsReq::copyInfo(CProcess *process, ProcessInfoNs_reply_def &process_info_ns) { - CProcess *parent; + const char method_name[] = "CNameServer::SendReceive"; + TRACE_ENTRY; - processInfo.nid = process->GetNid(); - processInfo.pid = process->GetPid(); - processInfo.verifier = process->GetVerifier(); - strncpy( processInfo.process_name, process->GetName(), MAX_PROCESS_NAME ); - processInfo.type = process->GetType(); + CProcess *parent; - parent = process->GetParent(); + process_info_ns.nid = process->GetNid(); + process_info_ns.pid = process->GetPid(); + process_info_ns.verifier = process->GetVerifier(); + strncpy( process_info_ns.process_name, process->GetName(), MAX_PROCESS_NAME ); + process_info_ns.type = process->GetType(); + parent = (process->GetParentNid() == -1 ? + NULL : + Nodes->GetLNode(process->GetParentNid()) + ->GetProcessL(process->GetParentPid())); if (parent) { - processInfo.parent_nid = parent->GetNid(); - processInfo.parent_pid = parent->GetPid(); - processInfo.parent_verifier = parent->GetVerifier(); -// strncpy(processInfo.parent_name, parent->GetName(), MAX_PROCESS_NAME ); + process_info_ns.parent_nid = parent->GetNid(); + process_info_ns.parent_pid = parent->GetPid(); + process_info_ns.parent_verifier = parent->GetVerifier(); } else { - processInfo.parent_nid = -1; - processInfo.parent_pid = -1; - processInfo.parent_verifier = -1; -// processInfo.parent_name[0] = '\0'; + process_info_ns.parent_nid = -1; + process_info_ns.parent_pid = -1; + process_info_ns.parent_verifier = -1; } - processInfo.priority = process->GetPriority(); - processInfo.backup = process->IsBackup(); - processInfo.state = process->GetState(); - processInfo.unhooked = process->IsUnhooked(); - processInfo.event_messages = process->IsEventMessages(); - processInfo.system_messages = process->IsSystemMessages(); - strncpy( processInfo.program, process->program(), MAX_PROCESS_PATH ); - processInfo.pathStrId = process->pathStrId(); - processInfo.ldpathStrId = process->ldPathStrId(); - processInfo.programStrId = process->programStrId(); - strncpy( processInfo.port_name, process->GetPort(), MPI_MAX_PORT_NAME ); - processInfo.argc = process->argc(); - memcpy( processInfo.argv, process->userArgv(), process->userArgvLen() ); - strncpy( processInfo.infile, process->infile(), MAX_PROCESS_PATH ); - strncpy( processInfo.outfile, process->outfile(), MAX_PROCESS_PATH ); - processInfo.creation_time = process->GetCreationTime(); + process_info_ns.priority = process->GetPriority(); + process_info_ns.backup = process->IsBackup(); + process_info_ns.state = process->GetState(); + process_info_ns.unhooked = process->IsUnhooked(); + process_info_ns.event_messages = process->IsEventMessages(); + process_info_ns.system_messages = process->IsSystemMessages(); + strncpy( process_info_ns.program, process->program(), MAX_PROCESS_PATH ); + process_info_ns.pathStrId = process->pathStrId(); + process_info_ns.ldpathStrId = process->ldPathStrId(); + process_info_ns.programStrId = process->programStrId(); + strncpy( process_info_ns.port_name, process->GetPort(), MPI_MAX_PORT_NAME ); + process_info_ns.argc = process->argc(); + memcpy( process_info_ns.argv, process->userArgv(), process->userArgvLen() ); + strncpy( process_info_ns.infile, process->infile(), MAX_PROCESS_PATH ); + strncpy( process_info_ns.outfile, process->outfile(), MAX_PROCESS_PATH ); + process_info_ns.creation_time = process->GetCreationTime(); + if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + { + char desc[2048]; + char* descp = desc; + sprintf( desc, + "process-info-ns reply:\n" + " process_info_ns.nid=%d\n" + " process_info_ns.pid=%d\n" + " process_info_ns.verifier=%d\n" + " process_info_ns.process_name=%s\n" + " process_info_ns.type=%d\n" + " process_info_ns.parent_nid=%d\n" + " process_info_ns.parent_pid=%d\n" + " process_info_ns.parent_verifier=%d\n" + " process_info_ns.priority=%d\n" + " process_info_ns.backup=%d\n" + " process_info_ns.state=%d\n" + " process_info_ns.unhooked=%d\n" + " process_info_ns.event_messages=%d\n" + " process_info_ns.system_messages=%d\n" + " process_info_ns.program=%s\n" + " process_info_ns.pathStrId=%d:%d\n" + " process_info_ns.ldpathStrId=%d:%d\n" + " process_info_ns.programStrId=%d:%d\n" + " process_info_ns.port_name=%s\n" + " process_info_ns.argc=%d\n" + " process_info_ns.infile=%s\n" + " process_info_ns.outfile=%s\n" + " process_info_ns.return_code=%d" + , process_info_ns.nid + , process_info_ns.pid + , process_info_ns.verifier + , process_info_ns.process_name + , process_info_ns.type + , process_info_ns.parent_nid + , process_info_ns.parent_pid + , process_info_ns.parent_verifier + , process_info_ns.priority + , process_info_ns.backup + , process_info_ns.state + , process_info_ns.unhooked + , process_info_ns.event_messages + , process_info_ns.system_messages + , process_info_ns.program + , process_info_ns.pathStrId.nid + , process_info_ns.pathStrId.id + , process_info_ns.ldpathStrId.nid + , process_info_ns.ldpathStrId.id + , process_info_ns.programStrId.nid + , process_info_ns.programStrId.id + , process_info_ns.port_name + , process_info_ns.argc + , process_info_ns.infile + , process_info_ns.outfile + , process_info_ns.return_code ); + trace_printf( "%s@%d - %s\n" + , method_name, __LINE__, descp ); + } + TRACE_EXIT; } void CExtProcInfoNsReq::populateRequestString( void ) @@ -148,30 +212,42 @@ void CExtProcInfoNsReq::performRequest() target_process_name = (const char *) msg_->u.request.u.process_info.target_process_name; target_verifier = msg_->u.request.u.process_info.target_verifier; + PROCESSTYPE target_type = msg_->u.request.u.process_info.type; + if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) { trace_printf( "%s@%d request #%ld: ProcessInfoNs, for (%d, %d:%d), " - "process type=%d\n" + "process type=%s\n" , method_name, __LINE__, id_ , target_nid, target_pid, target_verifier - , msg_->u.request.u.process_info.type); + , ProcessTypeString(target_type)); } - if ( target_process_name.size() ) + if (target_process_name.size()) { // find by name (don't check node state, don't check process state, not backup) process = Nodes->GetProcess( target_process_name.c_str() , target_verifier , false, false, false ); } else - { // find by nid (don't check node state, don't check process state, backup is Ok) - process = Nodes->GetProcess( target_nid - , target_pid - , target_verifier - , false, false, true ); + { + if (target_pid != -1) + { // find by nid,pid (don't check node state, don't check process state, backup is Ok) + process = Nodes->GetProcess( target_nid + , target_pid + , target_verifier + , false, false, true ); + } + else + { + CLNode *lnode = Nodes->GetLNode( target_nid ); + if (lnode) + { + process = lnode->GetProcessLByType( target_type ); + } + } } - if (process) { msg_->u.reply.type = ReplyType_ProcessInfoNs; @@ -182,7 +258,7 @@ void CExtProcInfoNsReq::performRequest() { if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) { - trace_printf( "%s@%d - Kill %s (%d, %d:%d) -- can't find target process\n" + trace_printf( "%s@%d - ProcessInfoNs %s (%d, %d:%d) -- can't find target process\n" , method_name, __LINE__ , msg_->u.request.u.process_info.target_process_name , msg_->u.request.u.process_info.target_nid http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/nsreqshutdown.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nsreqshutdown.cxx b/core/sqf/monitor/linux/nsreqshutdown.cxx index 0fd6896..e5888d2 100644 --- a/core/sqf/monitor/linux/nsreqshutdown.cxx +++ b/core/sqf/monitor/linux/nsreqshutdown.cxx @@ -84,22 +84,12 @@ void CExtShutdownNsReq::performRequest() msg_->u.request.u.shutdown.level); } - if ( msg_->u.request.u.shutdown.level == ShutdownLevel_Abrupt ) + if (( MyNode->GetState() != State_Down ) && + ( MyNode->GetState() != State_Stopped ) ) { - // Replicate a shutdown request so that all nodes begin to shutdown locally. - CReplShutdown *repl = new CReplShutdown(msg_->u.request.u.shutdown.level); - Replicator.addItem(repl); - } - else - { - // normal shutdown - // propagate the shutdown level before killing any processes. + MyNode->SetShutdownNameServer( true ); MyNode->SetShutdownLevel( msg_->u.request.u.shutdown.level ); - - if (MyNode->GetState() == State_Up) - { - MyNode->SetState( State_Shutdown ); - } + MyNode->SetState( State_Shutdown ); } msg_->u.reply.u.generic.return_code = MPI_SUCCESS; http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/pnode.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx index b5fa4ed..033a19a 100644 --- a/core/sqf/monitor/linux/pnode.cxx +++ b/core/sqf/monitor/linux/pnode.cxx @@ -169,15 +169,22 @@ CNode::CNode( char *name, int pnid, int rank ) ,tmSyncState_(SyncState_Null) #endif ,shutdownLevel_(ShutdownLevel_Undefined) + ,shutdownNameServer_(false) ,wdtKeepAliveTimerValue_(WDT_KeepAliveTimerDefault) ,zid_(pnid) + ,commPort_("") + ,syncPort_("") #ifdef NAMESERVER_PROCESS + ,mon2NsPort_("") + ,mon2NsSocketPort_(-1) ,monConnCount_(0) #else + ,ptpPort_("") ,ptpSocketPort_(-1) #endif ,commSocketPort_(-1) ,syncSocketPort_(-1) + ,uniqStrId_(-1) ,procStatFile_(NULL) ,procMeminfoFile_(-1) { @@ -302,8 +309,22 @@ CNode::CNode( char *name ,tmSyncNid_(-1) ,tmSyncState_(SyncState_Suspended) ,shutdownLevel_(ShutdownLevel_Undefined) + ,shutdownNameServer_(false) ,wdtKeepAliveTimerValue_(WDT_KeepAliveTimerDefault) ,zid_(-1) + ,commPort_("") + ,syncPort_("") +#ifdef NAMESERVER_PROCESS + ,mon2NsPort_("") + ,mon2NsSocketPort_(-1) + ,monConnCount_(-1) +#else + ,ptpPort_("") + ,ptpSocketPort_(-1) +#endif + ,commSocketPort_(-1) + ,syncSocketPort_(-1) + ,uniqStrId_(-1) ,procStatFile_(NULL) ,procMeminfoFile_(-1) { @@ -560,8 +581,6 @@ void CNode::CheckShutdownProcessing( void ) sprintf(buf, "Broadcasting shutdown notice, level = %d\n", shutdownLevel_); mon_log_write(MON_NODE_SHUTDOWN_1, SQ_LOG_WARNING, buf); Bcast (msg); - if ( NameServerEnabled ) - NameServer->ProcessShutdown(); delete msg; } #endif @@ -2458,6 +2477,20 @@ void CNodeContainer::CancelDeathNotification( int nid } #endif +void CNodeContainer::DeleteCloneProcess( CProcess *process ) +{ + const char method_name[] = "CNodeContainer::DeleteCloneProcess"; + TRACE_ENTRY; + + CNode *node; + node = Nodes->GetLNode(process->GetNid())->GetNode(); + node->DelFromNameMap ( process ); + node->DelFromPidMap ( process ); + node->DeleteFromList( process ); + + TRACE_EXIT; +} + void CNodeContainer::DeletedNode( CNode *node ) { const char method_name[] = "CNodeContainer::DeletedNode"; @@ -3009,6 +3042,200 @@ CProcess *CNodeContainer::GetProcessByName( const char *name, bool checkstate ) } #ifndef NAMESERVER_PROCESS +int CNodeContainer::GetProcessInfoNs( int nid + , int pid + , Verifier_t verifier + , ProcessInfoNs_reply_def *processInfo ) +{ + const char method_name[] = "CNodeContainer::GetProcessInfoNs"; + TRACE_ENTRY; + + int rc = MPI_SUCCESS; + + struct message_def msg; + msg.type = MsgType_Service; + msg.noreply = false; + msg.reply_tag = REPLY_TAG; + msg.u.request.type = ReqType_ProcessInfoNs; + + struct ProcessInfo_def *process_info = &msg.u.request.u.process_info; + process_info->nid = -1; + process_info->pid = -1; + process_info->verifier = -1; + process_info->process_name[0] = 0; + process_info->target_nid = nid; + process_info->target_pid = pid; + process_info->target_verifier = verifier; + process_info->target_process_name[0] = 0; + process_info->target_process_pattern[0] = 0; + process_info->type = ProcessType_Undefined; + + int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) + if (error == 0) + { + if ( (msg.type == MsgType_Service) && + (msg.u.reply.type == ReplyType_ProcessInfoNs) ) + { + if ( msg.u.reply.u.process_info_ns.return_code == MPI_SUCCESS ) + { + *processInfo = msg.u.reply.u.process_info_ns; + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s] ProcessInfo failed, rc=%d\n" + , method_name, msg.u.reply.u.process_info_ns.return_code ); + mon_log_write( MON_NODE_GETPROCESSNS_1, SQ_LOG_ERR, buf ); + } + rc = msg.u.reply.u.process_info_ns.return_code; + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s], Invalid MsgType(%d)/ReplyType(%d) for " + "ProcessInfoNs\n" + , method_name, msg.type, msg.u.reply.type ); + mon_log_write( MON_NODE_GETPROCESSNS_2, SQ_LOG_ERR, buf ); + rc = MPI_ERR_OP; + } + } + else + { + rc = MPI_ERR_OP; + } + + TRACE_EXIT; + return( rc ); +} + +int CNodeContainer::GetProcessInfoNs( const char *name + , Verifier_t verifier + , ProcessInfoNs_reply_def *processInfo ) +{ + const char method_name[] = "CNodeContainer::GetProcessInfoNs"; + TRACE_ENTRY; + + int rc = MPI_SUCCESS; + + struct message_def msg; + msg.type = MsgType_Service; + msg.noreply = false; + msg.reply_tag = REPLY_TAG; + msg.u.request.type = ReqType_ProcessInfoNs; + + struct ProcessInfo_def *process_info = &msg.u.request.u.process_info; + process_info->nid = -1; + process_info->pid = -1; + process_info->verifier = -1; + process_info->process_name[0] = 0; + process_info->target_nid = -1; + process_info->target_pid = -1; + process_info->target_verifier = verifier; + STRCPY( process_info->target_process_name, name); + process_info->target_process_pattern[0] = 0; + process_info->type = ProcessType_Undefined; + + int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) + if (error == 0) + { + if ( (msg.type == MsgType_Service) && + (msg.u.reply.type == ReplyType_ProcessInfoNs) ) + { + if ( msg.u.reply.u.process_info_ns.return_code == MPI_SUCCESS ) + { + *processInfo = msg.u.reply.u.process_info_ns; + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s] ProcessInfo failed, rc=%d\n" + , method_name, msg.u.reply.u.process_info_ns.return_code ); + mon_log_write( MON_NODE_GETPROCESSNS_3, SQ_LOG_ERR, buf ); + } + rc = msg.u.reply.u.process_info_ns.return_code; + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s], Invalid MsgType(%d)/ReplyType(%d) for " + "ProcessInfo\n" + , method_name, msg.type, msg.u.reply.type ); + mon_log_write( MON_NODE_GETPROCESSNS_4, SQ_LOG_ERR, buf ); + rc = MPI_ERR_OP; + } + } + else + { + rc = MPI_ERR_OP; + } + + TRACE_EXIT; + return( rc ); +} + +CProcess *CNodeContainer::GetProcessLByTypeNs( int nid, PROCESSTYPE type ) +{ + const char method_name[] = "CNodeContainer::GetProcessLByTypeNs"; + TRACE_ENTRY; + + CProcess *process = NULL; + + struct message_def msg; + msg.type = MsgType_Service; + msg.noreply = false; + msg.reply_tag = REPLY_TAG; + msg.u.request.type = ReqType_ProcessInfoNs; + + struct ProcessInfo_def *processInfo = &msg.u.request.u.process_info; + processInfo->nid = -1; + processInfo->pid = -1; + processInfo->verifier = -1; + processInfo->process_name[0] = 0; + processInfo->target_nid = nid; + processInfo->target_pid = -1; + processInfo->target_verifier = -1; + processInfo->target_process_name[0] = 0; + processInfo->target_process_pattern[0] = 0; + processInfo->type = type; + + int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) + if (error == 0) + { + if ( (msg.type == MsgType_Service) && + (msg.u.reply.type == ReplyType_ProcessInfoNs) ) + { + if ( msg.u.reply.u.process_info_ns.return_code == MPI_SUCCESS ) + { + process = AddCloneProcess( &msg.u.reply.u.process_info_ns ); + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s] ProcessInfo failed, rc=%d\n" + , method_name, msg.u.reply.u.process_info_ns.return_code ); + mon_log_write( MON_NODE_GETPROCESSNS_3, SQ_LOG_ERR, buf ); + } + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s], Invalid MsgType(%d)/ReplyType(%d) for " + "ProcessInfo\n" + , method_name, msg.type, msg.u.reply.type ); + mon_log_write( MON_NODE_GETPROCESSNS_4, SQ_LOG_ERR, buf ); + } + } + + TRACE_EXIT; + return( process ); +} + CProcess *CNodeContainer::GetProcessNs( int nid , int pid , Verifier_t verifier ) @@ -3019,11 +3246,11 @@ CProcess *CNodeContainer::GetProcessNs( int nid CProcess *process = NULL; struct message_def msg; - memset(&msg, 0, sizeof(msg) ); // TODO: remove! msg.type = MsgType_Service; msg.noreply = false; msg.reply_tag = REPLY_TAG; msg.u.request.type = ReqType_ProcessInfoNs; + struct ProcessInfo_def *processInfo = &msg.u.request.u.process_info; processInfo->nid = -1; processInfo->pid = -1; @@ -3033,6 +3260,8 @@ CProcess *CNodeContainer::GetProcessNs( int nid processInfo->target_pid = pid; processInfo->target_verifier = verifier; processInfo->target_process_name[0] = 0; + processInfo->target_process_pattern[0] = 0; + processInfo->type = ProcessType_Undefined; int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) if (error == 0) @@ -3076,11 +3305,11 @@ CProcess *CNodeContainer::GetProcessNs( const char *name, Verifier_t verifier ) CProcess *process = NULL; struct message_def msg; - memset(&msg, 0, sizeof(msg) ); // TODO: remove! msg.type = MsgType_Service; msg.noreply = false; msg.reply_tag = REPLY_TAG; msg.u.request.type = ReqType_ProcessInfoNs; + struct ProcessInfo_def *processInfo = &msg.u.request.u.process_info; processInfo->nid = -1; processInfo->pid = -1; @@ -3090,6 +3319,8 @@ CProcess *CNodeContainer::GetProcessNs( const char *name, Verifier_t verifier ) processInfo->target_pid = -1; processInfo->target_verifier = verifier; STRCPY( processInfo->target_process_name, name); + processInfo->target_process_pattern[0] = 0; + processInfo->type = ProcessType_Undefined; int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) if (error == 0) http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/pnode.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/pnode.h b/core/sqf/monitor/linux/pnode.h index b13fbef..44008ea 100644 --- a/core/sqf/monitor/linux/pnode.h +++ b/core/sqf/monitor/linux/pnode.h @@ -76,6 +76,7 @@ public: , int verifier , _TM_Txid_External trans_id ); void ChangedNode( CNode *node ); + void DeleteCloneProcess( CProcess *process ); void DeletedNode( CNode *node ); bool DeleteNode( int pnid ); void DeleteNode( CNode *node ); @@ -112,6 +113,15 @@ public: , bool checkstate=true , bool backupOk=false ); CProcess *GetProcessByName( const char *name, bool checkstate=true ); + int GetProcessInfoNs( int nid + , int pid + , Verifier_t verifier + , ProcessInfoNs_reply_def *processInfo ); + int GetProcessInfoNs( const char *name + , Verifier_t verifier + , ProcessInfoNs_reply_def *processInfo ); + CProcess *GetProcessLByTypeNs( int nid + , PROCESSTYPE type ); CProcess *GetProcessNs( int nid , int pid , Verifier_t verifier ); @@ -243,6 +253,7 @@ public: inline const char *GetSyncPort( void ) { return syncPort_.c_str(); } #ifdef NAMESERVER_PROCESS inline const char *GetMon2NsPort( void ) { return mon2NsPort_.c_str(); } + inline int GetMon2NsSocketPort( void ) { return( mon2NsSocketPort_ ); } inline int GetMonConnCount( void ) { return monConnCount_; } #else inline const char *GetPtPPort( void ) { return ptpPort_.c_str(); } @@ -271,6 +282,7 @@ public: inline bool IsSpareNode( void ) { return( spareNode_ ); } inline bool IsSoftNodeDown( void ) { return( internalState_ == State_SoftDown ); } inline bool IsSoftNodeUp( void ) { return( internalState_ == State_SoftUp ); } + inline bool IsShutdownNameServer( void ) { return( shutdownNameServer_ ); } CNode *Link( CNode *entry ); void MoveLNodes( CNode *targetNode ); @@ -318,6 +330,7 @@ public: inline void SetSyncPort( char *syncPort) { syncPort_ = syncPort; } #ifdef NAMESERVER_PROCESS inline void SetMon2NsPort( char *mon2NsPort) { mon2NsPort_ = mon2NsPort; } + inline void SetMon2NsSocketPort( int mon2NsSocketPort) { mon2NsSocketPort_ = mon2NsSocketPort; } #else inline void SetPtPPort( char *ptpPort) { ptpPort_ = ptpPort; } inline void SetPtPSocketPort( int ptpSocketPort) { ptpSocketPort_ = ptpSocketPort; } @@ -326,6 +339,7 @@ public: inline void SetCommSocketPort( int commSocketPort) { commSocketPort_ = commSocketPort; } inline void SetSyncSocketPort( int syncSocketPort) { syncSocketPort_ = syncSocketPort; } inline void SetSpareNode( void ) { spareNode_ = true; } + inline void SetShutdownNameServer( bool shutdown ) { shutdownNameServer_ = shutdown; } inline void SetShutdownLevel( ShutdownLevel level ) { shutdownLevel_ = level; } void SetState( STATE state ); inline void SetTmSyncNid( int nid ) { tmSyncNid_ = nid; } @@ -403,6 +417,7 @@ private: int tmSyncNid_; // Logical Node of TM that initiated sync SyncState tmSyncState_; // Sync operation state with TMs ShutdownLevel shutdownLevel_; + bool shutdownNameServer_; // true when monitor shutdown Name Server request is received int wdtKeepAliveTimerValue_; // expiration time struct timeval todStart_; // time of last watchdog reset @@ -417,6 +432,7 @@ private: string syncPort_; // monitor socket allgather port #ifdef NAMESERVER_PROCESS string mon2NsPort_; // monitor to ns port + int mon2NsSocketPort_; // monitor to ns socket port int monConnCount_; // monitor connections #else string ptpPort_; http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/process.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/process.cxx b/core/sqf/monitor/linux/process.cxx index fce323c..5ecaf50 100644 --- a/core/sqf/monitor/linux/process.cxx +++ b/core/sqf/monitor/linux/process.cxx @@ -2939,6 +2939,30 @@ void CProcess::Exit( CProcess *parent ) SetState(State_Stopped); + if (parent && NameServerEnabled) + { + ProcessInfoNs_reply_def processInfo; + int rc = Nodes->GetProcessInfoNs( parent->GetNid() + , parent->GetPid() + , parent->GetVerifier() + , &processInfo); + if (rc == MPI_ERR_NAME) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - Deleting clone process %s, (%d,%d:%d)\n" + , method_name, __LINE__ + , parent->GetName() + , parent->GetNid() + , parent->GetPid() + , parent->GetVerifier() ); + } + Nodes->DeleteCloneProcess( parent ); + parent = NULL; + } + + } + // if the env is set to not deliver death messages upon node down, // check the state of the process' node. bool supplyProcessDeathNotices = true; http://git-wip-us.apache.org/repos/asf/trafodion/blob/db656603/core/sqf/monitor/linux/reqtmleader.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqtmleader.cxx b/core/sqf/monitor/linux/reqtmleader.cxx index 9cfe6ea..9d4fb96 100644 --- a/core/sqf/monitor/linux/reqtmleader.cxx +++ b/core/sqf/monitor/linux/reqtmleader.cxx @@ -28,11 +28,13 @@ #include "montrace.h" #include "monsonar.h" #include "monlogging.h" +#include "nameserver.h" extern CMonStats *MonStats; extern CNode *MyNode; extern CNodeContainer *Nodes; extern CMonitor *Monitor; +extern bool NameServerEnabled; CExtTmLeaderReq::CExtTmLeaderReq (reqQueueMsg_t msgType, int pid, struct message_def *msg ) @@ -117,11 +119,27 @@ void CExtTmLeaderReq::performRequest() Monitor->ExitSyncCycle(); } + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - tmLeaderNid=%d\n" + , method_name, __LINE__, tmLeaderNid ); + } + if ( MyNode->GetShutdownLevel() == ShutdownLevel_Undefined ) { CProcess *process; process = Nodes->GetLNode(tmLeaderNid)->GetProcessLByType( ProcessType_DTM ); + if (!process && NameServerEnabled) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - Getting process from Name Server, nid=%d, type=ProcessType_DTM\n" + , method_name, __LINE__, tmLeaderNid ); + } + + process = Nodes->GetProcessLByTypeNs( tmLeaderNid, ProcessType_DTM ); + } if (!process) { @@ -150,6 +168,24 @@ void CExtTmLeaderReq::performRequest() msg_->u.reply.u.generic.pid = process->GetPid(); msg_->u.reply.u.generic.verifier = process->GetVerifier(); strcpy (msg_->u.reply.u.generic.process_name, process->GetName()); + + if (process && NameServerEnabled) + { + if (!MyNode->IsMyNode( process->GetNid() )) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - Deleting clone process %s, (%d,%d:%d)\n" + , method_name, __LINE__ + , process->GetName() + , process->GetNid() + , process->GetPid() + , process->GetVerifier() ); + } + Nodes->DeleteCloneProcess( process ); + } + + } } else {
