Multiple fixes to problems found by Seabed tests.

Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/5d97082d
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/5d97082d
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/5d97082d

Branch: refs/heads/master
Commit: 5d97082d6ff9c299f473c6421d9131679c53c04f
Parents: 010e563
Author: Zalo Correa <[email protected]>
Authored: Tue Apr 3 18:42:09 2018 -0700
Committer: Zalo Correa <[email protected]>
Committed: Tue Apr 3 18:42:09 2018 -0700

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx     | 11 +++++
 core/sqf/monitor/linux/nameserver.cxx  |  8 +++
 core/sqf/monitor/linux/pnode.cxx       | 12 +++++
 core/sqf/monitor/linux/ptpclient.cxx   | 26 +++++++++-
 core/sqf/monitor/linux/ptpclient.h     |  3 ++
 core/sqf/monitor/linux/reqprocinfo.cxx | 75 +++++++++++++++++++++++++++++
 core/sqf/sql/scripts/monitor.env       |  1 +
 7 files changed, 134 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx 
b/core/sqf/monitor/linux/cluster.cxx
index 070230e..aa3062c 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -9153,6 +9153,17 @@ int CCluster::MkCltSock( const char *portName )
     host[len] = '\0';
     port = atoi(&colon[1]);
 
+    if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+    {
+        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+        {
+            trace_printf( "%s@%d - Connecting to %s:%d\n"
+                        , method_name, __LINE__
+                        , host
+                        , port );
+        }
+    }
+
     size = sizeof(sockinfo);
 
     if ( !retries )

http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/nameserver.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/nameserver.cxx 
b/core/sqf/monitor/linux/nameserver.cxx
index adddf37..1815409 100644
--- a/core/sqf/monitor/linux/nameserver.cxx
+++ b/core/sqf/monitor/linux/nameserver.cxx
@@ -328,6 +328,14 @@ int CNameServer::SockCreate( void )
     strcpy( host, mon2nsHost_ );
     port = atoi( mon2nsPort_ );
     
+    if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+    {
+        trace_printf( "%s@%d - Connecting to %s:%d\n"
+                    , method_name, __LINE__
+                    , host
+                    , port );
+    }
+
     size = sizeof(sockinfo );
 
     if ( !retries )

http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/pnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx
index 6affd4f..5c51ada 100644
--- a/core/sqf/monitor/linux/pnode.cxx
+++ b/core/sqf/monitor/linux/pnode.cxx
@@ -2825,6 +2825,18 @@ CProcess *CNodeContainer::GetProcess( const char *name
     const char method_name[] = "CNodeContainer::GetProcess(name,verifier)";
     TRACE_ENTRY;
 
+    if (trace_settings & (TRACE_REQUEST_DETAIL | TRACE_PROCESS_DETAIL))
+    {
+        trace_printf( "%s@%d Getting %s:%d, "
+                      "checknode=%d, checkprocess=%d, backupOk=%d\n"
+                    , method_name, __LINE__
+                    , name
+                    , verifier
+                    , checknode
+                    , checkprocess
+                    , backupOk );
+    }
+
     while ( node )
     {
         if ( checknode )

http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/ptpclient.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/ptpclient.cxx 
b/core/sqf/monitor/linux/ptpclient.cxx
index 4f3f292..98ca3a4 100644
--- a/core/sqf/monitor/linux/ptpclient.cxx
+++ b/core/sqf/monitor/linux/ptpclient.cxx
@@ -62,7 +62,15 @@ CPtpClient::CPtpClient (void)
 {
     const char method_name[] = "CPtpClient::CPtpClient";
     TRACE_ENTRY;
+
+    ptpHost_[0] = '\0';
+    ptpPortBase_[0] = '\0';
+    if ( !IsRealCluster )
+    {
+        SetLocalHost();
+    }
     
+
     char * p = getenv( "MON2MON_COMM_PORT" );
     if ( p ) 
     {
@@ -702,6 +710,11 @@ int CPtpClient::ReceiveSock(char *buf, int size, int 
sockFd)
     return error;
 }
 
+void CPtpClient::SetLocalHost( void )
+{
+    gethostname( ptpHost_, MAX_PROCESSOR_NAME );
+}
+
 int CPtpClient::SendSock(char *buf, int size, int sockFd)
 {
     const char method_name[] = "CPtpClient::SendSock";
@@ -773,23 +786,32 @@ int CPtpClient::SendToMon(const char *reqType, 
internal_msg_def *msg, int size,
     TRACE_ENTRY;
     
     char monPortString[MAX_PROCESSOR_NAME];
+    char ptpHost[MAX_PROCESSOR_NAME];
     char ptpPort[MAX_PROCESSOR_NAME];
     int tempPort = basePort_;
     
+    ptpHost[0] = '\0';
+
     // For virtual env
     if (!IsRealCluster)
     {
         tempPort += receiveNode;
+        strcat( ptpHost, ptpHost_ );
+    }
+    else
+    {
+        strcat( ptpHost, hostName );
     }
     
     if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
     {
         trace_printf( "%s@%d - reqType=%s, hostName=%s, receiveNode=%d, "
-                      "tempPort=%d, basePort_=%d\n"
+                      "ptpHost=%s, tempPort=%d, basePort_=%d\n"
                     , method_name, __LINE__
                     , reqType
                     , hostName
                     , receiveNode
+                    , ptpHost
                     , tempPort 
                     , basePort_ );
     }
@@ -797,7 +819,7 @@ int CPtpClient::SendToMon(const char *reqType, 
internal_msg_def *msg, int size,
     memset( &ptpPort, 0, MAX_PROCESSOR_NAME );
     memset( &ptpPortBase_, 0, MAX_PROCESSOR_NAME+100 );
 
-    strcat( ptpPortBase_, hostName );
+    strcat( ptpPortBase_, ptpHost );
     strcat( ptpPortBase_, ":" );
     sprintf( monPortString,"%d", tempPort );
     strcat( ptpPort, ptpPortBase_ );

http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/ptpclient.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/ptpclient.h 
b/core/sqf/monitor/linux/ptpclient.h
index 554bc71..d46ea5e 100644
--- a/core/sqf/monitor/linux/ptpclient.h
+++ b/core/sqf/monitor/linux/ptpclient.h
@@ -66,7 +66,9 @@ public:
                       , const char *targetNodeName );
 
 private:
+
     int  basePort_;
+    char ptpHost_[MAX_PROCESSOR_NAME];
     char ptpPortBase_[MAX_PROCESSOR_NAME+100];
     int  ptpSock_;
     int  seqNum_;
@@ -74,6 +76,7 @@ private:
     int  ReceiveSock(char *buf, int size, int sockFd);
     int  SendSock(char *buf, int size, int sockFd);
     int  SendToMon(const char *reqType, internal_msg_def *msg, int size, int 
receiveNode, const char *hostName);
+    void SetLocalHost( void );
 };
 
 #endif

http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/monitor/linux/reqprocinfo.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqprocinfo.cxx 
b/core/sqf/monitor/linux/reqprocinfo.cxx
index 0ad8d00..566a92b 100644
--- a/core/sqf/monitor/linux/reqprocinfo.cxx
+++ b/core/sqf/monitor/linux/reqprocinfo.cxx
@@ -44,8 +44,11 @@ extern CNameServer *NameServer;
 // Copy information for a specific process into the reply message buffer.
 void CExtProcInfoBase::ProcessInfo_CopyData(CProcess *process, 
ProcessInfoState &procState)
 {
+    const char method_name[] = "CNameServer::SendReceive";
     CProcess *parent;
 
+    TRACE_ENTRY;
+
     procState.nid = process->GetNid();
     procState.pid = process->GetPid();
     procState.verifier = process->GetVerifier();
@@ -82,6 +85,58 @@ void CExtProcInfoBase::ProcessInfo_CopyData(CProcess 
*process, ProcessInfoState
         procState.parent_verifier = -1;
         procState.parent_name[0] = '\0';
     }
+
+    if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
+    {
+        char desc[2048];
+        char* descp = desc;
+        sprintf( desc, 
+                 "ProcessInfo reply:\n"
+                 "        procState.process_name=%s\n"
+                 "        procState.nid=%d\n"
+                 "        procState.pid=%d\n"
+                 "        procState.verifier=%d\n"
+                 "        procState.type=%d\n"
+                 "        procState.os_pid=%d\n"
+                 "        procState.parent_name=%s\n"
+                 "        procState.parent_nid=%d\n"
+                 "        procState.parent_pid=%d\n"
+                 "        procState.parent_verifier=%d\n"
+                 "        procState.priority=%d\n"
+                 "        procState.state=%d\n"
+                 "        procState.pending_delete=%d\n"
+                 "        procState.event_messages=%d\n"
+                 "        procState.system_messages=%d\n"
+                 "        procState.paired=%d\n"
+                 "        procState.waiting_startup=%d\n"
+                 "        procState.opened=%d\n"
+                 "        procState.backup=%d\n"
+                 "        procState.program=%s\n"
+                 , procState.process_name
+                 , procState.nid
+                 , procState.pid
+                 , procState.verifier
+                 , procState.type
+                 , procState.os_pid
+                 , procState.parent_name
+                 , procState.parent_nid
+                 , procState.parent_pid
+                 , procState.parent_verifier
+                 , procState.priority
+                 , procState.state
+                 , procState.pending_delete
+                 , procState.event_messages
+                 , procState.system_messages
+                 , procState.paired
+                 , procState.waiting_startup
+                 , procState.opened
+                 , procState.backup
+                 , procState.program );
+        trace_printf( "%s@%d - %s\n"
+                    , method_name, __LINE__, descp );
+    }
+
+    TRACE_EXIT;
 }
 
 
@@ -337,6 +392,7 @@ void CExtProcInfoReq::performRequest()
         if ( processName_.size() )
         { // find by name
 #ifdef NAMESERVER_PROCESS
+            //  (check node state, check process state, not backup)
             requester = Nodes->GetProcess( processName_.c_str()
                                          , verifier_ );
 #else
@@ -347,9 +403,13 @@ void CExtProcInfoReq::performRequest()
         else
         { // find by pid
 #ifdef NAMESERVER_PROCESS
+            //  (don't check node state, don't check process state, backup is 
Ok)
             requester =
                Nodes->GetProcess( nid_ , pid_ , verifier_
                                 , false, false, true );
+//            CLNode *lnode = Nodes->GetLNode( nid_ );
+//            CNode *node = lnode->GetNode();
+//            requester = node->GetProcess( pid_, verifier_ );
 #else
             requester = MyNode->GetProcess( pid_
                                           , verifier_ );
@@ -394,6 +454,11 @@ void CExtProcInfoReq::performRequest()
                 if ( requester && strcmp( requester->GetName()
                            , 
msg_->u.request.u.process_info.target_process_name) == 0 )
                 {
+                    if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
+                    {
+                        trace_printf("%s@%d request #%ld: ProcessInfo, for "
+                                     "requester\n", method_name, __LINE__, 
id_);
+                    }
                     ProcessInfo_CopyData(requester,
                                          
msg_->u.reply.u.process_info.process[0]);
                     count = 1;
@@ -419,6 +484,11 @@ void CExtProcInfoReq::performRequest()
                     {
                         if ( target_verifier == -1 )
                         { // the name may represent process pair, return 
primary only
+                            if (trace_settings & (TRACE_REQUEST | 
TRACE_PROCESS))
+                            {
+                                trace_printf("%s@%d request #%ld: ProcessInfo, 
for "
+                                             "process pair\n", method_name, 
__LINE__, id_);
+                            }
                             ProcessInfo_CopyPairData( process
                                                     , 
msg_->u.reply.u.process_info.process[0] );
                             count = 1;
@@ -426,6 +496,11 @@ void CExtProcInfoReq::performRequest()
                         }
                         else
                         { 
+                            if (trace_settings & (TRACE_REQUEST | 
TRACE_PROCESS))
+                            {
+                                trace_printf("%s@%d request #%ld: ProcessInfo, 
for "
+                                             "process\n", method_name, 
__LINE__, id_);
+                            }
                             ProcessInfo_CopyData( process
                                                 , 
msg_->u.reply.u.process_info.process[0] );
                             count = 1;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/5d97082d/core/sqf/sql/scripts/monitor.env
----------------------------------------------------------------------
diff --git a/core/sqf/sql/scripts/monitor.env b/core/sqf/sql/scripts/monitor.env
index a9172b8..2bfa4a7 100644
--- a/core/sqf/sql/scripts/monitor.env
+++ b/core/sqf/sql/scripts/monitor.env
@@ -40,6 +40,7 @@
 #MON_TRACE_SYNC_DETAIL=1
 #MON_TRACE_MLIO_DETAIL=1
 
+#MON_TRACE_MEAS
 #MON_TRACE_TMSYNC=1
 #MON_TRACE_STATS=1
 #MON_TRACE_ENTRY_EXIT=1

Reply via email to