Multiple fixes to problems found by Seabed tests.
Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/5d97082d Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/5d97082d Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/5d97082d Branch: refs/heads/master Commit: 5d97082d6ff9c299f473c6421d9131679c53c04f Parents: 010e563 Author: Zalo Correa <[email protected]> Authored: Tue Apr 3 18:42:09 2018 -0700 Committer: Zalo Correa <[email protected]> Committed: Tue Apr 3 18:42:09 2018 -0700 ---------------------------------------------------------------------- core/sqf/monitor/linux/cluster.cxx | 11 +++++ core/sqf/monitor/linux/nameserver.cxx | 8 +++ core/sqf/monitor/linux/pnode.cxx | 12 +++++ core/sqf/monitor/linux/ptpclient.cxx | 26 +++++++++- core/sqf/monitor/linux/ptpclient.h | 3 ++ core/sqf/monitor/linux/reqprocinfo.cxx | 75 +++++++++++++++++++++++++++++ core/sqf/sql/scripts/monitor.env | 1 + 7 files changed, 134 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/cluster.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx index 070230e..aa3062c 100644 --- a/core/sqf/monitor/linux/cluster.cxx +++ b/core/sqf/monitor/linux/cluster.cxx @@ -9153,6 +9153,17 @@ int CCluster::MkCltSock( const char *portName ) host[len] = '\0'; port = atoi(&colon[1]); + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY)) + { + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY)) + { + trace_printf( "%s@%d - Connecting to %s:%d\n" + , method_name, __LINE__ + , host + , port ); + } + } + size = sizeof(sockinfo); if ( !retries ) http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/nameserver.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nameserver.cxx b/core/sqf/monitor/linux/nameserver.cxx index adddf37..1815409 100644 --- a/core/sqf/monitor/linux/nameserver.cxx +++ b/core/sqf/monitor/linux/nameserver.cxx @@ -328,6 +328,14 @@ int CNameServer::SockCreate( void ) strcpy( host, mon2nsHost_ ); port = atoi( mon2nsPort_ ); + if (trace_settings & (TRACE_INIT | TRACE_RECOVERY)) + { + trace_printf( "%s@%d - Connecting to %s:%d\n" + , method_name, __LINE__ + , host + , port ); + } + size = sizeof(sockinfo ); if ( !retries ) http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/pnode.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx index 6affd4f..5c51ada 100644 --- a/core/sqf/monitor/linux/pnode.cxx +++ b/core/sqf/monitor/linux/pnode.cxx @@ -2825,6 +2825,18 @@ CProcess *CNodeContainer::GetProcess( const char *name const char method_name[] = "CNodeContainer::GetProcess(name,verifier)"; TRACE_ENTRY; + if (trace_settings & (TRACE_REQUEST_DETAIL | TRACE_PROCESS_DETAIL)) + { + trace_printf( "%s@%d Getting %s:%d, " + "checknode=%d, checkprocess=%d, backupOk=%d\n" + , method_name, __LINE__ + , name + , verifier + , checknode + , checkprocess + , backupOk ); + } + while ( node ) { if ( checknode ) http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/ptpclient.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/ptpclient.cxx b/core/sqf/monitor/linux/ptpclient.cxx index 4f3f292..98ca3a4 100644 --- a/core/sqf/monitor/linux/ptpclient.cxx +++ b/core/sqf/monitor/linux/ptpclient.cxx @@ -62,7 +62,15 @@ CPtpClient::CPtpClient (void) { const char method_name[] = "CPtpClient::CPtpClient"; TRACE_ENTRY; + + ptpHost_[0] = '\0'; + ptpPortBase_[0] = '\0'; + if ( !IsRealCluster ) + { + SetLocalHost(); + } + char * p = getenv( "MON2MON_COMM_PORT" ); if ( p ) { @@ -702,6 +710,11 @@ int CPtpClient::ReceiveSock(char *buf, int size, int sockFd) return error; } +void CPtpClient::SetLocalHost( void ) +{ + gethostname( ptpHost_, MAX_PROCESSOR_NAME ); +} + int CPtpClient::SendSock(char *buf, int size, int sockFd) { const char method_name[] = "CPtpClient::SendSock"; @@ -773,23 +786,32 @@ int CPtpClient::SendToMon(const char *reqType, internal_msg_def *msg, int size, TRACE_ENTRY; char monPortString[MAX_PROCESSOR_NAME]; + char ptpHost[MAX_PROCESSOR_NAME]; char ptpPort[MAX_PROCESSOR_NAME]; int tempPort = basePort_; + ptpHost[0] = '\0'; + // For virtual env if (!IsRealCluster) { tempPort += receiveNode; + strcat( ptpHost, ptpHost_ ); + } + else + { + strcat( ptpHost, hostName ); } if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) { trace_printf( "%s@%d - reqType=%s, hostName=%s, receiveNode=%d, " - "tempPort=%d, basePort_=%d\n" + "ptpHost=%s, tempPort=%d, basePort_=%d\n" , method_name, __LINE__ , reqType , hostName , receiveNode + , ptpHost , tempPort , basePort_ ); } @@ -797,7 +819,7 @@ int CPtpClient::SendToMon(const char *reqType, internal_msg_def *msg, int size, memset( &ptpPort, 0, MAX_PROCESSOR_NAME ); memset( &ptpPortBase_, 0, MAX_PROCESSOR_NAME+100 ); - strcat( ptpPortBase_, hostName ); + strcat( ptpPortBase_, ptpHost ); strcat( ptpPortBase_, ":" ); sprintf( monPortString,"%d", tempPort ); strcat( ptpPort, ptpPortBase_ ); http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/ptpclient.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/ptpclient.h b/core/sqf/monitor/linux/ptpclient.h index 554bc71..d46ea5e 100644 --- a/core/sqf/monitor/linux/ptpclient.h +++ b/core/sqf/monitor/linux/ptpclient.h @@ -66,7 +66,9 @@ public: , const char *targetNodeName ); private: + int basePort_; + char ptpHost_[MAX_PROCESSOR_NAME]; char ptpPortBase_[MAX_PROCESSOR_NAME+100]; int ptpSock_; int seqNum_; @@ -74,6 +76,7 @@ private: int ReceiveSock(char *buf, int size, int sockFd); int SendSock(char *buf, int size, int sockFd); int SendToMon(const char *reqType, internal_msg_def *msg, int size, int receiveNode, const char *hostName); + void SetLocalHost( void ); }; #endif http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/reqprocinfo.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/reqprocinfo.cxx b/core/sqf/monitor/linux/reqprocinfo.cxx index 0ad8d00..566a92b 100644 --- a/core/sqf/monitor/linux/reqprocinfo.cxx +++ b/core/sqf/monitor/linux/reqprocinfo.cxx @@ -44,8 +44,11 @@ extern CNameServer *NameServer; // Copy information for a specific process into the reply message buffer. void CExtProcInfoBase::ProcessInfo_CopyData(CProcess *process, ProcessInfoState &procState) { + const char method_name[] = "CNameServer::SendReceive"; CProcess *parent; + TRACE_ENTRY; + procState.nid = process->GetNid(); procState.pid = process->GetPid(); procState.verifier = process->GetVerifier(); @@ -82,6 +85,58 @@ void CExtProcInfoBase::ProcessInfo_CopyData(CProcess *process, ProcessInfoState procState.parent_verifier = -1; procState.parent_name[0] = '\0'; } + + if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + { + char desc[2048]; + char* descp = desc; + sprintf( desc, + "ProcessInfo reply:\n" + " procState.process_name=%s\n" + " procState.nid=%d\n" + " procState.pid=%d\n" + " procState.verifier=%d\n" + " procState.type=%d\n" + " procState.os_pid=%d\n" + " procState.parent_name=%s\n" + " procState.parent_nid=%d\n" + " procState.parent_pid=%d\n" + " procState.parent_verifier=%d\n" + " procState.priority=%d\n" + " procState.state=%d\n" + " procState.pending_delete=%d\n" + " procState.event_messages=%d\n" + " procState.system_messages=%d\n" + " procState.paired=%d\n" + " procState.waiting_startup=%d\n" + " procState.opened=%d\n" + " procState.backup=%d\n" + " procState.program=%s\n" + , procState.process_name + , procState.nid + , procState.pid + , procState.verifier + , procState.type + , procState.os_pid + , procState.parent_name + , procState.parent_nid + , procState.parent_pid + , procState.parent_verifier + , procState.priority + , procState.state + , procState.pending_delete + , procState.event_messages + , procState.system_messages + , procState.paired + , procState.waiting_startup + , procState.opened + , procState.backup + , procState.program ); + trace_printf( "%s@%d - %s\n" + , method_name, __LINE__, descp ); + } + + TRACE_EXIT; } @@ -337,6 +392,7 @@ void CExtProcInfoReq::performRequest() if ( processName_.size() ) { // find by name #ifdef NAMESERVER_PROCESS + // (check node state, check process state, not backup) requester = Nodes->GetProcess( processName_.c_str() , verifier_ ); #else @@ -347,9 +403,13 @@ void CExtProcInfoReq::performRequest() else { // find by pid #ifdef NAMESERVER_PROCESS + // (don't check node state, don't check process state, backup is Ok) requester = Nodes->GetProcess( nid_ , pid_ , verifier_ , false, false, true ); +// CLNode *lnode = Nodes->GetLNode( nid_ ); +// CNode *node = lnode->GetNode(); +// requester = node->GetProcess( pid_, verifier_ ); #else requester = MyNode->GetProcess( pid_ , verifier_ ); @@ -394,6 +454,11 @@ void CExtProcInfoReq::performRequest() if ( requester && strcmp( requester->GetName() , msg_->u.request.u.process_info.target_process_name) == 0 ) { + if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + { + trace_printf("%s@%d request #%ld: ProcessInfo, for " + "requester\n", method_name, __LINE__, id_); + } ProcessInfo_CopyData(requester, msg_->u.reply.u.process_info.process[0]); count = 1; @@ -419,6 +484,11 @@ void CExtProcInfoReq::performRequest() { if ( target_verifier == -1 ) { // the name may represent process pair, return primary only + if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + { + trace_printf("%s@%d request #%ld: ProcessInfo, for " + "process pair\n", method_name, __LINE__, id_); + } ProcessInfo_CopyPairData( process , msg_->u.reply.u.process_info.process[0] ); count = 1; @@ -426,6 +496,11 @@ void CExtProcInfoReq::performRequest() } else { + if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS)) + { + trace_printf("%s@%d request #%ld: ProcessInfo, for " + "process\n", method_name, __LINE__, id_); + } ProcessInfo_CopyData( process , msg_->u.reply.u.process_info.process[0] ); count = 1; http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/sql/scripts/monitor.env ---------------------------------------------------------------------- diff --git a/core/sqf/sql/scripts/monitor.env b/core/sqf/sql/scripts/monitor.env index a9172b8..2bfa4a7 100644 --- a/core/sqf/sql/scripts/monitor.env +++ b/core/sqf/sql/scripts/monitor.env @@ -40,6 +40,7 @@ #MON_TRACE_SYNC_DETAIL=1 #MON_TRACE_MLIO_DETAIL=1 +#MON_TRACE_MEAS #MON_TRACE_TMSYNC=1 #MON_TRACE_STATS=1 #MON_TRACE_ENTRY_EXIT=1
