Fixes to external requests with Name Server enabled and target process is in a remote node.
Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/731fd2d1 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/731fd2d1 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/731fd2d1 Branch: refs/heads/master Commit: 731fd2d1dc6aa8cbce40bc92a0370d5eed315645 Parents: 1f7eabb Author: Zalo Correa <[email protected]> Authored: Thu May 17 17:05:11 2018 -0700 Committer: Zalo Correa <[email protected]> Committed: Thu May 17 17:05:11 2018 -0700 ---------------------------------------------------------------------- core/sqf/monitor/linux/config.cxx | 20 ++- core/sqf/monitor/linux/pnode.cxx | 246 +++++++++++++++--------------- core/sqf/monitor/linux/pnode.h | 10 +- core/sqf/monitor/linux/reqdump.cxx | 88 +++++++++-- core/sqf/monitor/linux/reqevent.cxx | 121 ++++++++++----- core/sqf/monitor/linux/reqkill.cxx | 107 ++++++++++--- core/sqf/monitor/linux/reqnotify.cxx | 27 +--- core/sqf/monitor/linux/reqopen.cxx | 10 +- core/sqf/monitor/linux/reqqueue.cxx | 18 +-- 9 files changed, 414 insertions(+), 233 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/config.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/config.cxx b/core/sqf/monitor/linux/config.cxx index db08956..c757e25 100644 --- a/core/sqf/monitor/linux/config.cxx +++ b/core/sqf/monitor/linux/config.cxx @@ -246,7 +246,7 @@ void CConfigGroup::NormalizeName (string &name) void CConfigGroup::SendChangeNotification (CConfigKey *key) { - CProcess *process; + CProcess *targetProcess; struct message_def *msg; const char method_name[] = "CConfigGroup::SendChangeNotification"; @@ -357,16 +357,16 @@ void CConfigGroup::SendChangeNotification (CConfigKey *key) case ConfigType_Process: // check if we need to associate a process - Nodes->GetLNode ((char *)name_.c_str(), &process); - if ( process ) + Nodes->GetLNode ((char *)name_.c_str(), &targetProcess); + if ( targetProcess ) { - if ( process->IsSystemMessages() && !process->IsClone() ) + if ( targetProcess->IsSystemMessages() && !targetProcess->IsClone() ) { if (trace_settings & (TRACE_SYNC | TRACE_REQUEST | TRACE_INIT)) - trace_printf("%s@%d - Sending Configuration Change message to %s\n", method_name, __LINE__, process->GetName()); + trace_printf("%s@%d - Sending Configuration Change message to %s\n", method_name, __LINE__, targetProcess->GetName()); #ifndef NAMESERVER_PROCESS - SQ_theLocalIOToClient->putOnNoticeQueue( process->GetPid() - , process->GetVerifier() + SQ_theLocalIOToClient->putOnNoticeQueue( targetProcess->GetPid() + , targetProcess->GetVerifier() , msg , NULL); #endif @@ -374,7 +374,11 @@ void CConfigGroup::SendChangeNotification (CConfigKey *key) else { if (trace_settings & (TRACE_SYNC | TRACE_REQUEST | TRACE_INIT)) - trace_printf("%s@%d - Not sending Configuration Change message to %s, system_messages=%d, isClone=%d\n", method_name, __LINE__, process->GetName(), process->IsSystemMessages(), process->IsClone()); + trace_printf("%s@%d - Not sending Configuration Change message to %s, system_messages=%d, isClone=%d\n" + , method_name, __LINE__ + , targetProcess->GetName() + , targetProcess->IsSystemMessages() + , targetProcess->IsClone()); delete msg; } http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/pnode.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx index 0884343..c044100 100644 --- a/core/sqf/monitor/linux/pnode.cxx +++ b/core/sqf/monitor/linux/pnode.cxx @@ -2491,6 +2491,131 @@ void CNodeContainer::CancelDeathNotification( int nid } #endif +#ifndef NAMESERVER_PROCESS +CProcess *CNodeContainer::CloneProcessNs( int nid + , int pid + , Verifier_t verifier ) +{ + const char method_name[] = "CNodeContainer::CloneProcessNs"; + TRACE_ENTRY; + + CProcess *process = NULL; + + struct message_def msg; + msg.type = MsgType_Service; + msg.noreply = false; + msg.reply_tag = REPLY_TAG; + msg.u.request.type = ReqType_ProcessInfoNs; + + struct ProcessInfo_def *processInfo = &msg.u.request.u.process_info; + processInfo->nid = -1; + processInfo->pid = -1; + processInfo->verifier = -1; + processInfo->process_name[0] = 0; + processInfo->target_nid = nid; + processInfo->target_pid = pid; + processInfo->target_verifier = verifier; + processInfo->target_process_name[0] = 0; + processInfo->target_process_pattern[0] = 0; + processInfo->type = ProcessType_Undefined; + + int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) + if (error == 0) + { + if ( (msg.type == MsgType_Service) && + (msg.u.reply.type == ReplyType_ProcessInfoNs) ) + { + if ( msg.u.reply.u.process_info_ns.return_code == MPI_SUCCESS ) + { + process = AddCloneProcess( &msg.u.reply.u.process_info_ns ); + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s] ProcessInfo failed, rc=%d\n" + , method_name, msg.u.reply.u.process_info_ns.return_code ); + mon_log_write( MON_NODE_GETPROCESSNS_1, SQ_LOG_ERR, buf ); + } + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s], Invalid MsgType(%d)/ReplyType(%d) for " + "ProcessInfoNs\n" + , method_name, msg.type, msg.u.reply.type ); + mon_log_write( MON_NODE_GETPROCESSNS_2, SQ_LOG_ERR, buf ); + } + } + + TRACE_EXIT; + return( process ); +} +#endif + +#ifndef NAMESERVER_PROCESS +CProcess *CNodeContainer::CloneProcessNs( const char *name, Verifier_t verifier ) +{ + const char method_name[] = "CNodeContainer::CloneProcessNs"; + TRACE_ENTRY; + + CProcess *process = NULL; + + struct message_def msg; + msg.type = MsgType_Service; + msg.noreply = false; + msg.reply_tag = REPLY_TAG; + msg.u.request.type = ReqType_ProcessInfoNs; + + struct ProcessInfo_def *processInfo = &msg.u.request.u.process_info; + processInfo->nid = -1; + processInfo->pid = -1; + processInfo->verifier = -1; + processInfo->process_name[0] = 0; + processInfo->target_nid = -1; + processInfo->target_pid = -1; + processInfo->target_verifier = verifier; + STRCPY( processInfo->target_process_name, name); + processInfo->target_process_pattern[0] = 0; + processInfo->type = ProcessType_Undefined; + + int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) + if (error == 0) + { + if ( (msg.type == MsgType_Service) && + (msg.u.reply.type == ReplyType_ProcessInfoNs) ) + { + if ( msg.u.reply.u.process_info_ns.return_code == MPI_SUCCESS ) + { + process = AddCloneProcess( &msg.u.reply.u.process_info_ns ); + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s] ProcessInfo failed, rc=%d\n" + , method_name, msg.u.reply.u.process_info_ns.return_code ); + mon_log_write( MON_NODE_GETPROCESSNS_3, SQ_LOG_ERR, buf ); + } + } + else + { + char buf[MON_STRING_BUF_SIZE]; + snprintf( buf, sizeof(buf), + "[%s], Invalid MsgType(%d)/ReplyType(%d) for " + "ProcessInfo\n" + , method_name, msg.type, msg.u.reply.type ); + mon_log_write( MON_NODE_GETPROCESSNS_4, SQ_LOG_ERR, buf ); + } + } + + TRACE_EXIT; + return( process ); +} +#endif + +#ifndef NAMESERVER_PROCESS void CNodeContainer::DeleteCloneProcess( CProcess *process ) { const char method_name[] = "CNodeContainer::DeleteCloneProcess"; @@ -2504,6 +2629,7 @@ void CNodeContainer::DeleteCloneProcess( CProcess *process ) TRACE_EXIT; } +#endif void CNodeContainer::DeletedNode( CNode *node ) { @@ -3275,126 +3401,6 @@ CProcess *CNodeContainer::GetProcessLByTypeNs( int nid, PROCESSTYPE type ) return( process ); } -CProcess *CNodeContainer::GetProcessNs( int nid - , int pid - , Verifier_t verifier ) -{ - const char method_name[] = "CNodeContainer::GetProcessNs"; - TRACE_ENTRY; - - CProcess *process = NULL; - - struct message_def msg; - msg.type = MsgType_Service; - msg.noreply = false; - msg.reply_tag = REPLY_TAG; - msg.u.request.type = ReqType_ProcessInfoNs; - - struct ProcessInfo_def *processInfo = &msg.u.request.u.process_info; - processInfo->nid = -1; - processInfo->pid = -1; - processInfo->verifier = -1; - processInfo->process_name[0] = 0; - processInfo->target_nid = nid; - processInfo->target_pid = pid; - processInfo->target_verifier = verifier; - processInfo->target_process_name[0] = 0; - processInfo->target_process_pattern[0] = 0; - processInfo->type = ProcessType_Undefined; - - int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) - if (error == 0) - { - if ( (msg.type == MsgType_Service) && - (msg.u.reply.type == ReplyType_ProcessInfoNs) ) - { - if ( msg.u.reply.u.process_info_ns.return_code == MPI_SUCCESS ) - { - process = AddCloneProcess( &msg.u.reply.u.process_info_ns ); - } - else - { - char buf[MON_STRING_BUF_SIZE]; - snprintf( buf, sizeof(buf), - "[%s] ProcessInfo failed, rc=%d\n" - , method_name, msg.u.reply.u.process_info_ns.return_code ); - mon_log_write( MON_NODE_GETPROCESSNS_1, SQ_LOG_ERR, buf ); - } - } - else - { - char buf[MON_STRING_BUF_SIZE]; - snprintf( buf, sizeof(buf), - "[%s], Invalid MsgType(%d)/ReplyType(%d) for " - "ProcessInfoNs\n" - , method_name, msg.type, msg.u.reply.type ); - mon_log_write( MON_NODE_GETPROCESSNS_2, SQ_LOG_ERR, buf ); - } - } - - TRACE_EXIT; - return( process ); -} - -CProcess *CNodeContainer::GetProcessNs( const char *name, Verifier_t verifier ) -{ - const char method_name[] = "CNodeContainer::GetProcessNs"; - TRACE_ENTRY; - - CProcess *process = NULL; - - struct message_def msg; - msg.type = MsgType_Service; - msg.noreply = false; - msg.reply_tag = REPLY_TAG; - msg.u.request.type = ReqType_ProcessInfoNs; - - struct ProcessInfo_def *processInfo = &msg.u.request.u.process_info; - processInfo->nid = -1; - processInfo->pid = -1; - processInfo->verifier = -1; - processInfo->process_name[0] = 0; - processInfo->target_nid = -1; - processInfo->target_pid = -1; - processInfo->target_verifier = verifier; - STRCPY( processInfo->target_process_name, name); - processInfo->target_process_pattern[0] = 0; - processInfo->type = ProcessType_Undefined; - - int error = NameServer->ProcessInfoNs(&msg); // in reqQueue thread (CExternalReq) - if (error == 0) - { - if ( (msg.type == MsgType_Service) && - (msg.u.reply.type == ReplyType_ProcessInfoNs) ) - { - if ( msg.u.reply.u.process_info_ns.return_code == MPI_SUCCESS ) - { - process = AddCloneProcess( &msg.u.reply.u.process_info_ns ); - } - else - { - char buf[MON_STRING_BUF_SIZE]; - snprintf( buf, sizeof(buf), - "[%s] ProcessInfo failed, rc=%d\n" - , method_name, msg.u.reply.u.process_info_ns.return_code ); - mon_log_write( MON_NODE_GETPROCESSNS_3, SQ_LOG_ERR, buf ); - } - } - else - { - char buf[MON_STRING_BUF_SIZE]; - snprintf( buf, sizeof(buf), - "[%s], Invalid MsgType(%d)/ReplyType(%d) for " - "ProcessInfo\n" - , method_name, msg.type, msg.u.reply.type ); - mon_log_write( MON_NODE_GETPROCESSNS_4, SQ_LOG_ERR, buf ); - } - } - - TRACE_EXIT; - return( process ); -} - SyncState CNodeContainer::GetTmState ( SyncState check_state ) { SyncState state = check_state; http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/pnode.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/pnode.h b/core/sqf/monitor/linux/pnode.h index dafee70..fbfddf4 100644 --- a/core/sqf/monitor/linux/pnode.h +++ b/core/sqf/monitor/linux/pnode.h @@ -78,6 +78,11 @@ public: , int verifier , _TM_Txid_External trans_id ); void ChangedNode( CNode *node ); + CProcess *CloneProcessNs( int nid + , int pid + , Verifier_t verifier ); + CProcess *CloneProcessNs( const char *name + , Verifier_t verifier ); void DeleteCloneProcess( CProcess *process ); void DeletedNode( CNode *node ); bool DeleteNode( int pnid ); @@ -124,11 +129,6 @@ public: , ProcessInfoNs_reply_def *processInfo ); CProcess *GetProcessLByTypeNs( int nid , PROCESSTYPE type ); - CProcess *GetProcessNs( int nid - , int pid - , Verifier_t verifier ); - CProcess *GetProcessNs( const char *name - , Verifier_t verifier ); SyncState GetTmState( SyncState check_state ); CNode *GetZoneNode( int zid ); http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/reqdump.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqdump.cxx b/core/sqf/monitor/linux/reqdump.cxx index 4e56bb5..5d2dd5e 100644 --- a/core/sqf/monitor/linux/reqdump.cxx +++ b/core/sqf/monitor/linux/reqdump.cxx @@ -32,6 +32,7 @@ extern CMonStats *MonStats; extern CNode *MyNode; extern CNodeContainer *Nodes; +extern bool NameServerEnabled; CExtDumpReq::CExtDumpReq (reqQueueMsg_t msgType, int pid, struct message_def *msg ) @@ -73,8 +74,9 @@ void CExtDumpReq::performRequest() const char method_name[] = "CExtDumpReq::performRequest"; TRACE_ENTRY; - CProcess *target; - CProcess *requester; + CProcess *cloneProcess = NULL; + CProcess *targetProcess = NULL; + CProcess *requester = NULL; CLNode *lnode; string target_process_name; int target_nid = -1; @@ -127,28 +129,76 @@ void CExtDumpReq::performRequest() { if ( target_process_name.size() ) { // find by name - target = Nodes->GetProcess( target_process_name.c_str() - , target_verifier ); + targetProcess = Nodes->GetProcess( target_process_name.c_str() + , target_verifier ); } else { // find by nid, pid - target = Nodes->GetProcess( target_nid - , target_pid - , target_verifier ); + targetProcess = Nodes->GetProcess( target_nid + , target_pid + , target_verifier ); } - if ( target ) + if ( !targetProcess ) + { + if (NameServerEnabled) + { + if ( target_process_name.size() ) + { // Name Server find by name:verifier + if (trace_settings & TRACE_REQUEST) + { + trace_printf( "%s@%d" " - Getting targetProcess from Name Server (%s:%d)" "\n" + , method_name, __LINE__ + , target_process_name.c_str() + , target_verifier ); + } + cloneProcess = Nodes->CloneProcessNs( target_process_name.c_str() + , target_verifier ); + targetProcess = cloneProcess; + } + else + { // Name Server find by nid,pid:verifier + if (trace_settings & TRACE_REQUEST) + { + trace_printf( "%s@%d" " - Getting targetProcess from Name Server (%d,%d:%d)\n" + , method_name, __LINE__ + , target_nid + , target_pid + , target_verifier ); + } + cloneProcess = Nodes->CloneProcessNs( target_nid + , target_pid + , target_verifier ); + targetProcess = cloneProcess; + } + if (targetProcess) + { + if (trace_settings & TRACE_REQUEST) + trace_printf( "%s@%d - Found targetProcess %s (%d,%d:%d), clone=%d\n" + , method_name, __LINE__ + , targetProcess->GetName() + , targetProcess->GetNid() + , targetProcess->GetPid() + , targetProcess->GetVerifier() + , targetProcess->IsClone() ); + } + } + } + + if ( targetProcess ) { if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) trace_printf("%s@%d - Dump Process name=%s, nid=%d, pid=%d\n", - method_name, __LINE__, target->GetName(), - target->GetNid(), target->GetPid()); - target->parentContext(msg_); + method_name, __LINE__, targetProcess->GetName(), + targetProcess->GetNid(), targetProcess->GetPid()); + targetProcess->parentContext(msg_); lnode = Nodes->GetLNode(target_nid); if (lnode->Dump_Process(requester, - target, + targetProcess, msg_->u.request.u.dump.path) != SUCCESS) + { rc = MPI_ERR_SPAWN; + } } else { @@ -163,6 +213,20 @@ void CExtDumpReq::performRequest() // Otherwise the dump has been initiated and reply will be sent // upon completion. + if (NameServerEnabled && cloneProcess) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - Deleting clone process %s, (%d,%d:%d)\n" + , method_name, __LINE__ + , cloneProcess->GetName() + , cloneProcess->GetNid() + , cloneProcess->GetPid() + , cloneProcess->GetVerifier() ); + } + Nodes->DeleteCloneProcess( cloneProcess ); + } + if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) trace_printf("%s@%d - Unsuccessful rc=%d\n", method_name, __LINE__, rc); // build reply http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/reqevent.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqevent.cxx b/core/sqf/monitor/linux/reqevent.cxx index 422c861..01c9067 100644 --- a/core/sqf/monitor/linux/reqevent.cxx +++ b/core/sqf/monitor/linux/reqevent.cxx @@ -32,6 +32,7 @@ extern CMonStats *MonStats; extern CNode *MyNode; extern CNodeContainer *Nodes; +extern bool NameServerEnabled; CExtEventReq::CExtEventReq (reqQueueMsg_t msgType, int pid, struct message_def *msg ) @@ -85,7 +86,8 @@ void CExtEventReq::performRequest() int pid; int num_procs; PROCESSTYPE type; - CProcess *process = NULL; + CProcess *cloneProcess = NULL; + CProcess *targetProcess = NULL; CProcess *requester; // Record statistics (sonar counters) @@ -134,7 +136,7 @@ void CExtEventReq::performRequest() // Only monitor can send events to SQWatchdog process if ( type != ProcessType_Watchdog ) - { // Only monitor can send events to SQWatchdog process + { if (msg_->u.request.u.event.target_nid == -1) { if (trace_settings & TRACE_REQUEST) @@ -161,50 +163,84 @@ void CExtEventReq::performRequest() if ( target_process_name.size() ) { // find by name - process = Nodes->GetProcess( target_process_name.c_str() - , target_verifier ); - if ( process && trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + targetProcess = Nodes->GetProcess( target_process_name.c_str() + , target_verifier ); + if ( !targetProcess ) + { + if (NameServerEnabled) + { // Name Server find by name:verifier + if (trace_settings & TRACE_REQUEST) + { + trace_printf( "%s@%d" " - Getting targetProcess from Name Server (%s:%d)" "\n" + , method_name, __LINE__ + , target_process_name.c_str() + , target_verifier ); + } + cloneProcess = Nodes->CloneProcessNs( target_process_name.c_str() + , target_verifier ); + targetProcess = cloneProcess; + } + } + if ( targetProcess && trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) { trace_printf( "%s@%d - Found target by name %s (%d, %d:%d)\n" , method_name, __LINE__ - , process->GetName() - , process->GetNid() - , process->GetPid() - , process->GetVerifier()); + , targetProcess->GetName() + , targetProcess->GetNid() + , targetProcess->GetPid() + , targetProcess->GetVerifier()); } - pid = process ? process->GetPid() : -1; + pid = targetProcess ? targetProcess->GetPid() : -1; } else if (pid == -1) { // get info for all processes in node - process = Nodes->GetLNode(nid)->GetFirstProcess(); - if ( process && trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + targetProcess = Nodes->GetLNode(nid)->GetFirstProcess(); + if ( targetProcess && trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) { trace_printf( "%s@%d - Found target first process %s (%d, %d:%d)\n" , method_name, __LINE__ - , process->GetName() - , process->GetNid() - , process->GetPid() - , process->GetVerifier()); + , targetProcess->GetName() + , targetProcess->GetNid() + , targetProcess->GetPid() + , targetProcess->GetVerifier()); } } else { // get info for single process in node by pid - process = Nodes->GetProcess( target_nid - , pid - , target_verifier ); - if ( process && trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + targetProcess = Nodes->GetProcess( target_nid + , pid + , target_verifier ); + if ( !targetProcess ) + { + if (NameServerEnabled) + { // Name Server find by nid,pid:verifier + if (trace_settings & TRACE_REQUEST) + { + trace_printf( "%s@%d" " - Getting targetProcess from Name Server (%d,%d:%d)\n" + , method_name, __LINE__ + , target_nid + , pid + , target_verifier ); + } + cloneProcess = Nodes->CloneProcessNs( target_nid + , pid + , target_verifier ); + targetProcess = cloneProcess; + } + } + if ( targetProcess && trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) { trace_printf( "%s@%d - Found target by nid,pid %s (%d, %d:%d)\n" , method_name, __LINE__ - , process->GetName() - , process->GetNid() - , process->GetPid() - , process->GetVerifier()); + , targetProcess->GetName() + , targetProcess->GetNid() + , targetProcess->GetPid() + , targetProcess->GetVerifier()); } } } - if ( !process && target_nid != -1 && + if ( !targetProcess && target_nid != -1 && (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))) trace_printf("%s@%d" " - Target process not found! %s (%d, %d:%d)\n" , method_name, __LINE__ @@ -220,36 +256,49 @@ void CExtEventReq::performRequest() { if (target_nid == -1) { - process = lnode->GetFirstProcess(); + targetProcess = lnode->GetFirstProcess(); } - while (process && num_procs < MAX_PROC_LIST) + while (targetProcess && num_procs < MAX_PROC_LIST) { - if (pid == -1 || process->GetPid() == pid) + if (pid == -1 || targetProcess->GetPid() == pid) { - if ( process->GetType() != ProcessType_Watchdog ) + if ( targetProcess->GetType() != ProcessType_Watchdog ) { - if (type == ProcessType_Undefined || type == process->GetType()) + if (type == ProcessType_Undefined || type == targetProcess->GetType()) { - process->GenerateEvent( msg_->u.request.u.event.event_id, + targetProcess->GenerateEvent( msg_->u.request.u.event.event_id, msg_->u.request.u.event.length, msg_->u.request.u.event.data ); if (trace_settings & TRACE_REQUEST) trace_printf( "%s@%d - Event %d sent to %s (%d, %d:%d)\n" , method_name, __LINE__ , msg_->u.request.u.event.event_id - , process->GetName() - , process->GetNid() - , process->GetPid() - , process->GetVerifier()); + , targetProcess->GetName() + , targetProcess->GetNid() + , targetProcess->GetPid() + , targetProcess->GetVerifier()); rc = MPI_SUCCESS; } } } else { + if (NameServerEnabled && cloneProcess) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - Deleting clone process %s, (%d,%d:%d)\n" + , method_name, __LINE__ + , cloneProcess->GetName() + , cloneProcess->GetNid() + , cloneProcess->GetPid() + , cloneProcess->GetVerifier() ); + } + Nodes->DeleteCloneProcess( cloneProcess ); + } break; } - process = process->GetNextL(); + targetProcess = targetProcess->GetNextL(); } } http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/reqkill.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqkill.cxx b/core/sqf/monitor/linux/reqkill.cxx index ee88a19..e8cad71 100644 --- a/core/sqf/monitor/linux/reqkill.cxx +++ b/core/sqf/monitor/linux/reqkill.cxx @@ -138,7 +138,8 @@ void CExtKillReq::Kill( CProcess *process ) void CExtKillReq::performRequest() { bool status = FAILURE; - CProcess *process = NULL; + CProcess *cloneProcess = NULL; + CProcess *targetProcess = NULL; CProcess *backup = NULL; const char method_name[] = "CExtKillReq::performRequest"; @@ -195,41 +196,113 @@ void CExtKillReq::performRequest() { if ( target_process_name.size() ) { // find by name (check node state, don't check process state, not backup) - process = Nodes->GetProcess( target_process_name.c_str() - , target_verifier - , true, false, false ); - if ( process && + targetProcess = Nodes->GetProcess( target_process_name.c_str() + , target_verifier + , true, false, false ); + if ( targetProcess && (msg_->u.request.u.kill.target_nid == -1 || msg_->u.request.u.kill.target_pid == -1)) { - backup = process->GetBackup (); + backup = targetProcess->GetBackup (); } } else { // find by nid (check node state, don't check process state, backup is Ok) - process = Nodes->GetProcess( target_nid - , target_pid - , target_verifier - , true, false, true ); + targetProcess = Nodes->GetProcess( target_nid + , target_pid + , target_verifier + , true, false, true ); backup = NULL; } + if ( targetProcess ) + { + if (trace_settings & TRACE_REQUEST) + { + trace_printf( "%s@%d - Found targetProcess %s (%d,%d:%d), clone=%d\n" + , method_name, __LINE__ + , targetProcess->GetName() + , targetProcess->GetNid() + , targetProcess->GetPid() + , targetProcess->GetVerifier() + , targetProcess->IsClone() ); + } + } + else + { + if (NameServerEnabled) + { + if ( target_process_name.size() ) + { // Name Server find by name:verifier + if (trace_settings & TRACE_REQUEST) + { + trace_printf( "%s@%d" " - Getting targetProcess from Name Server (%s:%d)" "\n" + , method_name, __LINE__ + , target_process_name.c_str() + , target_verifier ); + } + cloneProcess = Nodes->CloneProcessNs( target_process_name.c_str() + , target_verifier ); + targetProcess = cloneProcess; + } + else + { // Name Server find by nid,pid:verifier + if (trace_settings & TRACE_REQUEST) + { + trace_printf( "%s@%d" " - Getting targetProcess from Name Server (%d,%d:%d)\n" + , method_name, __LINE__ + , target_nid + , target_pid + , target_verifier ); + } + cloneProcess = Nodes->CloneProcessNs( target_nid + , target_pid + , target_verifier ); + targetProcess = cloneProcess; + } + if (targetProcess) + { + if (trace_settings & TRACE_REQUEST) + trace_printf( "%s@%d - Found targetProcess %s (%d,%d:%d), clone=%d\n" + , method_name, __LINE__ + , targetProcess->GetName() + , targetProcess->GetNid() + , targetProcess->GetPid() + , targetProcess->GetVerifier() + , targetProcess->IsClone() ); + } + } + } - if (process) + if (targetProcess) { - process->SetAbort( msg_->u.request.u.kill.persistent_abort ); + targetProcess->SetAbort( msg_->u.request.u.kill.persistent_abort ); if (backup) { // We are killing both the primary and backup processes Kill( backup ); } - Kill( process ); + Kill( targetProcess ); + + if (NameServerEnabled && cloneProcess) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC)) + { + trace_printf( "%s@%d - Deleting clone process %s, (%d,%d:%d)\n" + , method_name, __LINE__ + , cloneProcess->GetName() + , cloneProcess->GetNid() + , cloneProcess->GetPid() + , cloneProcess->GetVerifier() ); + } + Nodes->DeleteCloneProcess( cloneProcess ); + } msg_->u.reply.type = ReplyType_Generic; - msg_->u.reply.u.generic.nid = process->GetNid(); - msg_->u.reply.u.generic.pid = process->GetPid(); - msg_->u.reply.u.generic.verifier = process->GetVerifier(); - strcpy (msg_->u.reply.u.generic.process_name, process->GetName()); + msg_->u.reply.u.generic.nid = targetProcess->GetNid(); + msg_->u.reply.u.generic.pid = targetProcess->GetPid(); + msg_->u.reply.u.generic.verifier = targetProcess->GetVerifier(); + strcpy (msg_->u.reply.u.generic.process_name, targetProcess->GetName()); msg_->u.reply.u.generic.return_code = MPI_SUCCESS; status = SUCCESS; } http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/reqnotify.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqnotify.cxx b/core/sqf/monitor/linux/reqnotify.cxx index 6dc43fc..5900f01 100644 --- a/core/sqf/monitor/linux/reqnotify.cxx +++ b/core/sqf/monitor/linux/reqnotify.cxx @@ -215,14 +215,7 @@ void CExtNotifyReq::performRequest() } else { - if (!NameServerEnabled) - { - if (trace_settings & TRACE_REQUEST) - { - trace_printf("%s@%d - Can't find targetProcess\n", method_name, __LINE__); - } - } - else + if (NameServerEnabled) { if ( target_process_name.size() ) { // Name Server find by name:verifier @@ -233,8 +226,8 @@ void CExtNotifyReq::performRequest() , target_process_name.c_str() , target_verifier ); } - targetProcess = Nodes->GetProcessNs( target_process_name.c_str() - , target_verifier ); + targetProcess = Nodes->CloneProcessNs( target_process_name.c_str() + , target_verifier ); } else { // Name Server find by nid,pid:verifier @@ -246,9 +239,9 @@ void CExtNotifyReq::performRequest() , target_pid , target_verifier ); } - targetProcess = Nodes->GetProcessNs( target_nid - , target_pid - , target_verifier ); + targetProcess = Nodes->CloneProcessNs( target_nid + , target_pid + , target_verifier ); } if (targetProcess) { @@ -261,14 +254,6 @@ void CExtNotifyReq::performRequest() , targetProcess->GetVerifier() , targetProcess->IsClone() ); } - else - { - trace_printf( "%s@%d" " - Can't find targetProcess (%d,%d:%d)\n" - , method_name, __LINE__ - , target_nid - , target_pid - , target_verifier ); - } } } http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/reqopen.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqopen.cxx b/core/sqf/monitor/linux/reqopen.cxx index 379049f..f131a08 100644 --- a/core/sqf/monitor/linux/reqopen.cxx +++ b/core/sqf/monitor/linux/reqopen.cxx @@ -291,8 +291,8 @@ bool CExtOpenReq::prepare() , method_name, __LINE__ , target_process_name.c_str() , target_verifier ); - openedProcess = Nodes->GetProcessNs( target_process_name.c_str() - , target_verifier ); + openedProcess = Nodes->CloneProcessNs( target_process_name.c_str() + , target_verifier ); } else { // Name Server find by nid,pid:verifier @@ -302,9 +302,9 @@ bool CExtOpenReq::prepare() , target_nid , target_pid , target_verifier ); - openedProcess = Nodes->GetProcessNs( target_nid - , target_pid - , target_verifier ); + openedProcess = Nodes->CloneProcessNs( target_nid + , target_pid + , target_verifier ); } } http://git-wip-us.apache.org/repos/asf/trafodion/blob/731fd2d1/core/sqf/monitor/linux/reqqueue.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqqueue.cxx b/core/sqf/monitor/linux/reqqueue.cxx index b10716c..508a325 100644 --- a/core/sqf/monitor/linux/reqqueue.cxx +++ b/core/sqf/monitor/linux/reqqueue.cxx @@ -1584,9 +1584,9 @@ void CIntNewProcReq::performRequest() , parentNid_ , parentPid_ , parentVerifier_ ); - parentProcess = Nodes->GetProcessNs( parentNid_ - , parentPid_ - , parentVerifier_ ); + parentProcess = Nodes->CloneProcessNs( parentNid_ + , parentPid_ + , parentVerifier_ ); } } } @@ -1964,9 +1964,9 @@ void CIntNotifyReq::performRequest() , pid_ , verifier_ ); } - sourceProcess = Nodes->GetProcessNs( nid_ - , pid_ - , verifier_ ); + sourceProcess = Nodes->CloneProcessNs( nid_ + , pid_ + , verifier_ ); if (sourceProcess) { if (trace_settings & TRACE_REQUEST) @@ -2037,9 +2037,9 @@ void CIntNotifyReq::performRequest() , targetPid_ , targetVerifier_ ); } - targetProcess = Nodes->GetProcessNs( targetNid_ - , targetPid_ - , targetVerifier_ ); + targetProcess = Nodes->CloneProcessNs( targetNid_ + , targetPid_ + , targetVerifier_ ); if (targetProcess) { if (trace_settings & TRACE_REQUEST)
