Repository: trafodion
Updated Branches:
  refs/heads/master 8b6a6bbeb -> 0c049d784


http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/montest_run.virtual
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/montest_run.virtual 
b/core/sqf/monitor/linux/montest_run.virtual
index 424d38d..e6a7463 100755
--- a/core/sqf/monitor/linux/montest_run.virtual
+++ b/core/sqf/monitor/linux/montest_run.virtual
@@ -23,24 +23,26 @@
 
 #!/bin/sh
 
+ARCH=`arch`
 # Cleanup 
-cd $TRAF_HOME/monitor/linux/Linux-x86_64/dbg
+cd ${TRAF_HOME}/monitor/linux/Linux-${ARCH}/dbg
 echo $PWD
 rm -f core* *.log *.lst test*sub*
 rm -f $MPI_TMPDIR/monitor.port.*
+exit 0
 
 # Setup monitor test files
 cd $TRAF_HOME/monitor/linux
 echo $PWD
 echo Copying monitor test files to execution directory
-echo cp -p ./test*sub* ./Linux-x86_64/dbg
-cp -p ./test*sub* ./Linux-x86_64/dbg
+echo cp -p ./test*sub* ./Linux-${ARCH}/dbg
+cp -p ./test*sub* ./Linux-${ARCH}/dbg
 
 # Establish SQ virtual cluster parameters
 export SQ_VIRTUAL_NODES=6
 export SQ_VIRTUAL_NID=0
 
-cd $TRAF_HOME/monitor/linux/Linux-x86_64/dbg
+cd ${TRAF_HOME}/monitor/linux/Linux-${ARCH}/dbg
 echo $PWD
 
 shell <<eof 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/pnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx
index 57ce0a6..00f1271 100644
--- a/core/sqf/monitor/linux/pnode.cxx
+++ b/core/sqf/monitor/linux/pnode.cxx
@@ -1133,10 +1133,10 @@ void CNode::StartWatchdogProcess( void )
     }
 
     //Displays the startup and keep alive timer values in use for a given run.
-    if (trace_settings & TRACE_INIT)
+    if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
        trace_printf("%s@%d" " - KeepAlive Timer in seconds =%d\n", 
method_name, __LINE__, (wdtKeepAliveTimerValue_));
 
-    if (trace_settings & TRACE_INIT)
+    if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
        trace_printf("%s@%d" " - Creating Watchdog Process\n", method_name, 
__LINE__);
 
     strcpy(path,getenv("PATH"));
@@ -1959,7 +1959,7 @@ int CNodeContainer::PackNodeMappings( intBuffPtr_t 
&buffer )
 
             ++count;
 
-            if (trace_settings & ( TRACE_INIT || TRACE_RECOVERY || 
TRACE_REQUEST_DETAIL) )
+            if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
                 trace_printf("%s@%d - Packing node mapping, pnidConfig=%d, 
pnid=%d \n",
                             method_name, __LINE__, pnidConfig, pnid);
         }
@@ -1982,7 +1982,7 @@ void CNodeContainer::UnpackNodeMappings( intBuffPtr_t 
&buffer, int nodeMapCount
         pnidConfig = *buffer++;
         pnid = *buffer++;
 
-        if (trace_settings & ( TRACE_INIT || TRACE_RECOVERY || 
TRACE_REQUEST_DETAIL) )
+        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
             trace_printf("%s@%d - Unpacking node mapping, pnidConfig=%d, 
pnid=%d \n",
                         method_name, __LINE__, pnidConfig, pnid);
 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/pnodeconfig.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnodeconfig.cxx 
b/core/sqf/monitor/linux/pnodeconfig.cxx
index 4d5ee99..03f52c9 100644
--- a/core/sqf/monitor/linux/pnodeconfig.cxx
+++ b/core/sqf/monitor/linux/pnodeconfig.cxx
@@ -569,9 +569,10 @@ int CPNodeConfigContainer::hostnamecmp( const char 
*p_str1, const char *p_str2 )
     if ( !p_str1 ) return 1;
     if ( !p_str2 ) return 1;
 
+    // Compare the string passed in
     int lv_ret = strcmp( p_str1, p_str2 );
     if ( lv_ret == 0 )
-    {
+    { // Got a match!
         return lv_ret;
     }
     if ( sb_strict_hostname_check )
@@ -586,23 +587,45 @@ int CPNodeConfigContainer::hostnamecmp( const char 
*p_str1, const char *p_str2 )
 
     char *lp_str1_dot = strchr( (char *) p_str1, '.' );
     if ( lp_str1_dot )
-    {
+    { // Found '.', copy up to one char before '.'
         memcpy( lv_str1_to_cmp, p_str1, lp_str1_dot - p_str1 );
     }
     else
-    {
+    { // Copy entire string
         strcpy( lv_str1_to_cmp, p_str1 );
     }
 
     char *lp_str2_dot = strchr( (char *) p_str2, '.' );
     if ( lp_str2_dot )
-    {
+    { // Found '.', copy up to one char before '.'
         memcpy( lv_str2_to_cmp, p_str2, lp_str2_dot - p_str2 );
     }
     else
-    {
+    { // Copy entire string
         strcpy( lv_str2_to_cmp, p_str2 );
     }
 
+    // Ignore case
+    NormalizeCase( lv_str1_to_cmp );
+    NormalizeCase( lv_str2_to_cmp );
     return strcmp( lv_str1_to_cmp, lv_str2_to_cmp );
 }
+
+char *CPNodeConfigContainer::NormalizeCase( char *token )
+{
+    char *ptr = token;
+
+    const char method_name[] = "CPNodeConfigContainer::NormalizeCase";
+    TRACE_ENTRY;
+
+    while ( *ptr )
+    {
+        *ptr = tolower( *ptr );
+        if ( *ptr == '\n' ) *ptr = '\0';
+        ptr++;
+    }
+
+    TRACE_EXIT;
+    return token;
+}
+

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/pnodeconfig.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnodeconfig.h 
b/core/sqf/monitor/linux/pnodeconfig.h
index 1916797..5deccc8 100644
--- a/core/sqf/monitor/linux/pnodeconfig.h
+++ b/core/sqf/monitor/linux/pnodeconfig.h
@@ -74,6 +74,8 @@ protected:
     int             nextPNid_;    // next physical node id available
 
 private:
+    static char  *NormalizeCase( char *token );
+
     int             pnodesConfigMax_; // maximum number of physical nodes
     PNodesConfigList_t  spareNodesConfigList_; // configured spare nodes list
     CPNodeConfig  *head_; // head of physical nodes linked list

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/process.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/process.cxx 
b/core/sqf/monitor/linux/process.cxx
index 8a35c4d..6a8e08b 100644
--- a/core/sqf/monitor/linux/process.cxx
+++ b/core/sqf/monitor/linux/process.cxx
@@ -327,13 +327,19 @@ CProcess::~CProcess (void)
     delete [] userArgv_;
 
     if (fd_stdin_ != -1 && !Clone)
-        Redirector.tryShutdownPipeFd(Pid, fd_stdin_);
+    {
+        Redirector.tryShutdownPipeFd(Pid, fd_stdin_, false);
+    }
 
     if (fd_stdout_ != -1)
-        Redirector.tryShutdownPipeFd(Pid, fd_stdout_);
+    {
+        Redirector.tryShutdownPipeFd(Pid, fd_stdout_, true);
+    }
 
     if (fd_stderr_ != -1)
-        Redirector.tryShutdownPipeFd(Pid, fd_stderr_);
+    {
+        Redirector.tryShutdownPipeFd(Pid, fd_stderr_, false);
+    }
 
     // Remove the fifos associated with this process (if any)
     if (fifo_stdin_.size() != 0)
@@ -911,9 +917,10 @@ bool CProcess::PickStdfile(PickStdFile_t whichStdfile,
         {
             ancestor = node->GetProcessL(nextPid);
             if ( ancestor  &&  
-                 (ancestor->CreationTime.tv_sec  < earlyCreationTime.tv_sec ||
-                 (ancestor->CreationTime.tv_sec == earlyCreationTime.tv_sec  &&
-                  ancestor->CreationTime.tv_nsec < earlyCreationTime.tv_nsec)) 
)
+                 (( ! MyNode->IsMyNode(ancestor->GetNid())) ||
+                  (ancestor->CreationTime.tv_sec  < earlyCreationTime.tv_sec ||
+                   (ancestor->CreationTime.tv_sec == earlyCreationTime.tv_sec  
&&
+                    ancestor->CreationTime.tv_nsec < 
earlyCreationTime.tv_nsec))) )
             {
                 earlyCreationTime.tv_sec  = ancestor->CreationTime.tv_sec;
                 earlyCreationTime.tv_nsec = ancestor->CreationTime.tv_nsec;
@@ -3853,71 +3860,91 @@ void CProcessContainer::AttachProcessCheck ( struct 
message_def *msg )
             if ( ! MyNode->IsSpareNode() )
             {
                 int nid = MyNode->AssignNid();
-                strId_t progStrId = MyNode->GetStringId( 
msg->u.request.u.startup.program );
-                strId_t nullStrId = { -1, -1 };
-                process =
-                    new CProcess (NULL, nid, msg->u.request.u.startup.os_pid, 
ProcessType_Generic, 0, 0, false, true, (char *) "", 
-                                  nullStrId, nullStrId, progStrId, (char *) 
"", (char *) "");
-                if (process == NULL)
+                if ( (nid == -1) && (MyNode->GetState() != State_Up) )
                 {
-                    //TODO: Log event
-                    abort();
-                }
-                if ( process )
-                {
-                    char user_argv[MAX_ARGS][MAX_ARG_SIZE];
-                    process->userArgs ( 0, user_argv );
-                }
-                if ( msg->u.request.u.startup.process_name[0] == '\0')
-                {   // Create a name for the process and place it in the
-                    // Name member of the process object);
-                    char pname[MAX_KEY_NAME];
-                    MyNode->BuildOurName(nid, process->GetPid(), pname );
-                    process->SetName( pname );
+                    snprintf( la_buf, sizeof(la_buf),
+                            "[%s], Can't attach the pid %d (program: %s) - the 
monitor is not up yet (curr state: %d).\n",
+                            method_name,
+                            msg->u.request.u.startup.os_pid,
+                            msg->u.request.u.startup.program,
+                            MyNode->GetState() );
+                    mon_log_write( MON_PROCESSCONT_ATTACHPCHECK_4, SQ_LOG_ERR, 
la_buf );
+
+                    msg->u.reply.type = ReplyType_Generic;
+                    msg->u.reply.u.generic.nid = -1;
+                    msg->u.reply.u.generic.pid = -1;
+                    msg->u.reply.u.generic.verifier = -1;
+                    msg->u.reply.u.generic.process_name[0] = '\0';
+                    msg->u.reply.u.generic.return_code = MPI_ERR_NAME;
                 }
                 else
                 {
-                    process->SetName ( 
-                    
MyNode->NormalizeName(msg->u.request.u.startup.process_name) );
+                    strId_t progStrId = MyNode->GetStringId( 
msg->u.request.u.startup.program );
+                    strId_t nullStrId = { -1, -1 };
+                    process =
+                        new CProcess( NULL, nid, 
msg->u.request.u.startup.os_pid, ProcessType_Generic, 0, 0, false, true, (char 
*) "", 
+                        nullStrId, nullStrId, progStrId, (char *) "", (char *) 
"" );
+                    if ( process == NULL )
+                    {
+                        //TODO: Log event
+                        abort();
+                    }
+                    if ( process )
+                    {
+                        char user_argv[MAX_ARGS][MAX_ARG_SIZE];
+                        process->userArgs( 0, user_argv );
+                    }
+                    if ( msg->u.request.u.startup.process_name[0] == '\0' )
+                    {   // Create a name for the process and place it in the
+                        // Name member of the process object);
+                        char pname[MAX_KEY_NAME];
+                        MyNode->BuildOurName( nid, process->GetPid( ), pname );
+                        process->SetName( pname );
+                    }
+                    else
+                    {
+                        process->SetName( 
+                            MyNode->NormalizeName( 
msg->u.request.u.startup.process_name ) );
+                    }
+                    process->SetAttached( true );
+                    process->SetupFifo( process->GetNid( ), 
msg->u.request.u.startup.os_pid );
+                    process->SetCreationTime( msg->u.request.u.startup.os_pid 
);
+                    process->SetVerifier( );
+                    AddToList( process );
+                    process->CompleteProcessStartup( 
msg->u.request.u.startup.port_name,
+                                                     
msg->u.request.u.startup.os_pid,
+                                                     
msg->u.request.u.startup.event_messages,
+                                                     
msg->u.request.u.startup.system_messages,
+                                                     false,
+                                                     NULL );
+
+                    msg->u.reply.type = ReplyType_Startup;
+                    msg->u.reply.u.startup_info.nid = process->GetNid( );
+                    msg->u.reply.u.startup_info.pid = process->GetPid( );
+                    msg->u.reply.u.startup_info.verifier = 
process->GetVerifier( );
+                    strcpy( msg->u.reply.u.startup_info.process_name, 
process->GetName( ) );
+                    msg->u.reply.u.startup_info.return_code = MPI_SUCCESS;
+                    STRCPY( msg->u.reply.u.startup_info.fifo_stdin,
+                            process->fifo_stdin() );
+                    STRCPY( msg->u.reply.u.startup_info.fifo_stdout,
+                            process->fifo_stdout() );
+                    STRCPY( msg->u.reply.u.startup_info.fifo_stderr,
+                            process->fifo_stderr() );
+
+                    Monitor->writeProcessMapBegin( process->GetName( )
+                                                 , process->GetNid( )
+                                                 , process->GetPid( )
+                                                 , process->GetVerifier( )
+                                                 , -1, -1, -1
+                                                 , 
msg->u.request.u.startup.program );
                 }
-                process->SetAttached ( true );
-                process->SetupFifo(process->GetNid(), 
msg->u.request.u.startup.os_pid);
-                process->SetCreationTime(msg->u.request.u.startup.os_pid);
-                process->SetVerifier();
-                AddToList( process );
-                process->CompleteProcessStartup ( 
msg->u.request.u.startup.port_name,
-                                                  
msg->u.request.u.startup.os_pid,
-                                                  
msg->u.request.u.startup.event_messages,
-                                                  
msg->u.request.u.startup.system_messages,
-                                                  false,
-                                                  NULL );
-
-                msg->u.reply.type = ReplyType_Startup;
-                msg->u.reply.u.startup_info.nid = process->GetNid();
-                msg->u.reply.u.startup_info.pid = process->GetPid();
-                msg->u.reply.u.startup_info.verifier = process->GetVerifier();
-                strcpy (msg->u.reply.u.startup_info.process_name, 
process->GetName());
-                msg->u.reply.u.startup_info.return_code = MPI_SUCCESS;
-                STRCPY(msg->u.reply.u.startup_info.fifo_stdin,
-                       process->fifo_stdin());
-                STRCPY(msg->u.reply.u.startup_info.fifo_stdout,
-                       process->fifo_stdout());
-                STRCPY(msg->u.reply.u.startup_info.fifo_stderr,
-                       process->fifo_stderr());
-
-                Monitor->writeProcessMapBegin( process->GetName()
-                                             , process->GetNid()
-                                             , process->GetPid()
-                                             , process->GetVerifier()
-                                             , -1, -1, -1
-                                             , 
msg->u.request.u.startup.program );
             }
             else
             {
-                snprintf(la_buf, sizeof(la_buf),
-                         "[%s], Can't attach, node is a spare node!\n",
-                         method_name);
-                mon_log_write(MON_PROCESSCONT_ATTACHPCHECK_3, SQ_LOG_ERR, 
la_buf);
+                snprintf( la_buf, sizeof(la_buf),
+                        "[%s], Can't attach, node is a spare node!\n",
+                        method_name );
+                mon_log_write( MON_PROCESSCONT_ATTACHPCHECK_3, SQ_LOG_ERR, 
la_buf );
 
                 msg->u.reply.type = ReplyType_Startup;
                 msg->u.reply.u.startup_info.nid = -1;
@@ -3930,10 +3957,10 @@ void CProcessContainer::AttachProcessCheck ( struct 
message_def *msg )
         else
         {
             // Find the duplicate process
-            snprintf(la_buf, sizeof(la_buf),
+            snprintf( la_buf, sizeof(la_buf),
                      "[%s], Can't attach duplicate process %s!\n",
-                     method_name, msg->u.request.u.startup.process_name);
-            mon_log_write(MON_PROCESSCONT_ATTACHPCHECK_4, SQ_LOG_ERR, la_buf);
+                     method_name, msg->u.request.u.startup.process_name );
+            mon_log_write( MON_PROCESSCONT_ATTACHPCHECK_4, SQ_LOG_ERR, la_buf 
);
 
             msg->u.reply.type = ReplyType_Generic;
             msg->u.reply.u.generic.nid = -1;
@@ -3941,7 +3968,7 @@ void CProcessContainer::AttachProcessCheck ( struct 
message_def *msg )
             msg->u.reply.u.generic.verifier = -1;
             msg->u.reply.u.generic.process_name[0] = '\0';
             msg->u.reply.u.generic.return_code = MPI_ERR_NAME;
-        } 
+        }
     }  
     // complete a monitor child process startup
     else

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/redirector.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/redirector.cxx 
b/core/sqf/monitor/linux/redirector.cxx
index b3780cc..de27211 100644
--- a/core/sqf/monitor/linux/redirector.cxx
+++ b/core/sqf/monitor/linux/redirector.cxx
@@ -1288,7 +1288,7 @@ void CRedirectStderr::handleOutput(ssize_t count, char 
*buffer)
                 buf[size-1] = '\n';
             }
         }
-        mon_log_write(MON_REDIR_STDERR, SQ_LOG_INFO, buf);
+        mon_log_write(MON_REDIR_STDERR, SQ_LOG_DEBUG, buf);
 
         delete [] buf;
     }
@@ -1764,7 +1764,7 @@ void CRedirector::stdinOn(int fd)
     TRACE_EXIT;
 }
 
-void CRedirector::tryShutdownPipeFd(int pid, int fd)
+void CRedirector::tryShutdownPipeFd(int pid, int fd, bool pv_delete_redirect)
 {
     const char method_name[] = "CRedirector::tryShutdownPipeFd";
     TRACE_ENTRY;
@@ -1784,9 +1784,12 @@ void CRedirector::tryShutdownPipeFd(int pid, int fd)
         redirect = iter->second;
 
         // bugcatcher, temp call
-        redirect->validateObj();
+        if (redirect->pid() != 0)
+            redirect->validateObj();
 
-        if (!redirect->active() && (pid == redirect->pid()))
+        if (((pv_delete_redirect) ||
+             (!redirect->active())) &&
+            (pid == redirect->pid()))
         {
             if (trace_settings & TRACE_REDIRECTION)
                 trace_printf("%s@%d invoking shutdownPipeFd for fd=%d\n",

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/redirector.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/redirector.h 
b/core/sqf/monitor/linux/redirector.h
index 1babca6..2bea30a 100644
--- a/core/sqf/monitor/linux/redirector.h
+++ b/core/sqf/monitor/linux/redirector.h
@@ -254,7 +254,7 @@ public:
     void stdinOff(int fd);
     void stdinOn(int fd);
 
-    void tryShutdownPipeFd(int pid, int fd);
+    void tryShutdownPipeFd(int pid, int fd, bool pv_delete_redirect);
 
     void disposeIoData(int fd, int count, char *buffer);
 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/reqexit.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqexit.cxx 
b/core/sqf/monitor/linux/reqexit.cxx
index 4cfdec5..8a487e8 100644
--- a/core/sqf/monitor/linux/reqexit.cxx
+++ b/core/sqf/monitor/linux/reqexit.cxx
@@ -96,6 +96,8 @@ void CExtExitReq::populateRequestString( void )
 void CExtExitReq::performRequest()
 {
     bool status = FAILURE;
+    int target_nid = -1;
+    CLNode *target_lnode = NULL;
 
     const char method_name[] = "CExtExitReq::performRequest";
     TRACE_ENTRY;
@@ -115,8 +117,9 @@ void CExtExitReq::performRequest()
                     , msg_->u.request.u.exit.verifier );
     }
 
-    if ((msg_->u.request.u.exit.nid < 0) ||
-        (msg_->u.request.u.exit.nid >= Nodes->GetLNodesConfigMax()))
+    target_nid = msg_->u.request.u.exit.nid;
+    target_lnode = Nodes->GetLNode( target_nid );
+    if ( target_lnode == NULL )
     {
         char buf[MON_STRING_BUF_SIZE];
         sprintf(buf, "[CMonitor::ExitProcess], Invalid Node ID!\n");

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/reqnewproc.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqnewproc.cxx 
b/core/sqf/monitor/linux/reqnewproc.cxx
index 7cd35ea..afe1f38 100644
--- a/core/sqf/monitor/linux/reqnewproc.cxx
+++ b/core/sqf/monitor/linux/reqnewproc.cxx
@@ -86,6 +86,7 @@ void CExtNewProcReq::performRequest()
     CProcess *process = NULL;
     CNode *node = NULL;
     CLNode *lnode = NULL;
+    CLNode *target_lnode = NULL;
     CLNode *zone_lnode = NULL;
     char la_buf[MON_STRING_BUF_SIZE];
     int result;
@@ -116,10 +117,10 @@ void CExtNewProcReq::performRequest()
     if ( requester )
     {
         target_nid = msg_->u.request.u.new_process.nid;
+        target_lnode = Nodes->GetLNode( target_nid );
         if ( msg_->u.request.u.new_process.type == ProcessType_SSMP ) 
         {
-            if (( msg_->u.request.u.new_process.nid < 0  ||
-                  msg_->u.request.u.new_process.nid >= 
Nodes->GetLNodesConfigMax() )   )
+            if ( target_lnode == NULL )
             {
                 // Nid must be specified
                 msg_->u.reply.type = ReplyType_NewProcess;
@@ -150,8 +151,7 @@ void CExtNewProcReq::performRequest()
         }
         if ( msg_->u.request.u.new_process.type == ProcessType_DTM )
         {
-            if (( msg_->u.request.u.new_process.nid < 0  ||
-                  msg_->u.request.u.new_process.nid >= 
Nodes->GetLNodesConfigMax() )   )
+            if ( target_lnode == NULL )
             {
                 // Nid must be specified
                 msg_->u.reply.type = ReplyType_NewProcess;
@@ -189,8 +189,7 @@ void CExtNewProcReq::performRequest()
         }
         if ( msg_->u.request.u.new_process.type == ProcessType_SPX ) 
         {
-            if (( msg_->u.request.u.new_process.nid < 0  ||
-                  msg_->u.request.u.new_process.nid >= 
Nodes->GetLNodesConfigMax() )   )
+            if ( target_lnode == NULL )
             {
                 // Nid must be specified
                 msg_->u.reply.type = ReplyType_NewProcess;
@@ -350,9 +349,7 @@ void CExtNewProcReq::performRequest()
                 }
             }
         }
-        else if (( msg_->u.request.u.new_process.type == ProcessType_DTM       
  ) &&
-                 (( msg_->u.request.u.new_process.nid < 0                    ) 
||
-                  ( msg_->u.request.u.new_process.nid >= 
Nodes->GetLNodesConfigMax() )   )   )
+        else if ( target_lnode == NULL )
         {
             msg_->u.reply.type = ReplyType_NewProcess;
             msg_->u.reply.u.new_process.return_code = MPI_ERR_SPAWN;
@@ -365,21 +362,6 @@ void CExtNewProcReq::performRequest()
     
             return;
         }
-        else if (( msg_->u.request.u.new_process.type != ProcessType_DTM       
  ) &&
-                 (( msg_->u.request.u.new_process.nid < 0                    ) 
||
-                  ( msg_->u.request.u.new_process.nid >= 
Nodes->GetLNodesConfigMax() )   )   )
-        {
-            msg_->u.reply.type = ReplyType_NewProcess;
-            msg_->u.reply.u.new_process.return_code = MPI_ERR_SPAWN;
-            // Send reply to requester
-            lioreply(msg_, pid_);
-
-            sprintf(la_buf, "[%s], Invalid Node ID (%d).\n", method_name,
-                    target_nid);
-            mon_log_write(MON_MONITOR_STARTPROCESS_7, SQ_LOG_ERR, la_buf);
-
-            return;
-        }
         else
         {
             if( msg_->u.request.u.new_process.backup )

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/reqopen.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqopen.cxx 
b/core/sqf/monitor/linux/reqopen.cxx
index 56b13b2..494a0e3 100644
--- a/core/sqf/monitor/linux/reqopen.cxx
+++ b/core/sqf/monitor/linux/reqopen.cxx
@@ -189,13 +189,17 @@ bool CExtOpenReq::prepare()
     const char method_name[] = "CExtOpenReq::prepare";
     TRACE_ENTRY;
 
+    int target_nid = -1;
+    CLNode *target_lnode = NULL;
+
     if ( prepared_ == true )
     {   // Already did the prepare work earlier.
         return true;
     }
 
-    if ((msg_->u.request.u.open.nid < 0) ||
-        (msg_->u.request.u.open.nid >= Nodes->GetLNodesConfigMax()))
+    target_nid = msg_->u.request.u.open.nid;
+    target_lnode = Nodes->GetLNode( target_nid );
+    if ( target_lnode == NULL )
     {
         char buf[MON_STRING_BUF_SIZE];
         sprintf(buf, "%s, Invalid Node ID (%d)\n", method_name,

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/reqqueue.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqqueue.cxx 
b/core/sqf/monitor/linux/reqqueue.cxx
index 764966f..becb0cd 100644
--- a/core/sqf/monitor/linux/reqqueue.cxx
+++ b/core/sqf/monitor/linux/reqqueue.cxx
@@ -1971,6 +1971,11 @@ CIntDownReq::CIntDownReq( int pnid )
 {
     // Add eyecatcher sequence as a debugging aid
     memcpy(&eyecatcher_, "RQIP", 4);
+
+    if ( pnid == MyPNID )
+    {
+        SetReviveFlag(1); // allow this request to be processed during revive
+    }
 }
 
 CIntDownReq::~CIntDownReq()

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/shell.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/shell.cxx b/core/sqf/monitor/linux/shell.cxx
index 7bbc6e3..5037dd3 100644
--- a/core/sqf/monitor/linux/shell.cxx
+++ b/core/sqf/monitor/linux/shell.cxx
@@ -44,6 +44,7 @@ using namespace std;
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <string> 
 
 #include "msgdef.h"
 #include "props.h"
@@ -541,6 +542,102 @@ bool update_cluster_state( bool displayState, bool 
checkSpareColdStandby = true
     return( true );
 }
 
+bool update_node_state( char *nodeName, bool checkSpareColdStandby = true )
+{
+    if ( strlen(nodeName) == 0 )
+    {
+        return( false );
+    }
+
+    int rc, rc2;
+    char pnodename[MPI_MAX_PROCESSOR_NAME];
+    CPhysicalNode  *physicalNode;
+    PhysicalNodeNameMap_t::iterator it;
+    CCmsh cmshcmd( "sqnodestatus" );
+
+    strncpy(pnodename, nodeName, MPI_MAX_PROCESSOR_NAME);
+    pnodename[MPI_MAX_PROCESSOR_NAME-1] = '\0';
+
+    // Look up name
+    it = PhysicalNodeMap.find( pnodename );
+
+    if (it != PhysicalNodeMap.end())
+    {
+        physicalNode = it->second;
+    }
+    else
+    {
+        printf( "[%s] Error: Internal error while looking up physical node 
map, node name does not exist, node name=%s\n", MyName, pnodename );
+        return( false );
+    }
+
+    // save, close and restore stdin when executing ssh command 
+    // because ssh, by design, would consume contents of stdin.
+    int savedStdIn = dup(STDIN_FILENO);
+    if ( savedStdIn == -1 )
+    {
+        fprintf(stderr, "[%s] Error: dup() failed for STDIN_FILENO: %s 
(%d)\n", MyName, strerror(errno), errno );
+        exit(1);
+    }
+    close(STDIN_FILENO);
+
+    rc = cmshcmd.GetNodeState( nodeName, physicalNode );
+    rc2 = dup2(savedStdIn, STDIN_FILENO);
+    if ( rc2 == -1 )
+    {
+        fprintf(stderr, "[%s] Error: dup2() failed for STDIN_FILENO: %s 
(%d)\n", MyName, strerror(errno), errno );
+        exit(1);
+    }
+    close(savedStdIn);
+
+    if ( rc == -1 )
+    {
+        return( false );
+    }
+
+    NodeState_t nodeState;
+    CPNodeConfig *pnodeConfig = ClusterConfig.GetPNodeConfig( nodeName );
+    if ( pnodeConfig )
+    {
+        if ( get_pnode_state( PNode[pnodeConfig->GetPNid()], nodeState ) )
+        {
+            if ( nodeState == StateUp )
+            {
+                if ( checkSpareColdStandby && SpareNodeColdStandby )
+                {
+                    if ( pnodeConfig  && pnodeConfig->IsSpareNode() )
+                    {
+                        ++NumDown;
+                        NodeState[pnodeConfig->GetPNid()] = false;
+                        nodeState = StateDown;
+                        set_pnode_state( PNode[pnodeConfig->GetPNid()], 
nodeState );
+                    }
+                    else
+                    {
+                        NodeState[pnodeConfig->GetPNid()] = true;
+                    }
+                }
+                else
+                {
+                    NodeState[pnodeConfig->GetPNid()] = true;
+                }
+            }
+            else
+            {
+                NodeState[pnodeConfig->GetPNid()] = false;
+                ++NumDown;
+            }
+        }
+    }
+    else
+    {
+        printf( "[%s] Physical node configuration does not exist, node 
name=%s\n", MyName, nodeName );
+        return( false );
+    }
+    
+    return( true );
+}
+
 int mon_log_write(int pv_event_type, posix_sqlog_severity_t pv_severity, char 
*pp_string)
 {
     pv_event_type = pv_event_type;
@@ -3770,8 +3867,8 @@ int node_up( int nid, char *node_name, bool nowait )
     // If this is a real cluster
     if ( nid == -1 )
     {
-        // Get current physical state of all nodes
-        if ( !update_cluster_state( true, false ) )
+        // Get current physical state of target nodes
+        if ( !update_node_state( node_name, false ) )
         {
             return( rc ) ;
         }

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/tcdbsqlite.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/tcdbsqlite.cxx 
b/core/sqf/monitor/linux/tcdbsqlite.cxx
index d53f602..ff18cdd 100644
--- a/core/sqf/monitor/linux/tcdbsqlite.cxx
+++ b/core/sqf/monitor/linux/tcdbsqlite.cxx
@@ -2507,10 +2507,6 @@ int CTcdbSqlite::GetUniqueString( int nid, int id, const 
char *uniqStr )
         }
         else
         {
-            if ( prepStmt != NULL )
-            {
-                sqlite3_finalize( prepStmt );
-            }
             char buf[TC_LOG_BUF_SIZE];
             snprintf( buf, sizeof(buf)
                     , "[%s] (%s) failed, nid=%d, id=%d, error: %s\n"
@@ -2631,10 +2627,6 @@ int CTcdbSqlite::GetUniqueStringId( int nid
         }
         else
         {
-            if ( prepStmt != NULL )
-            {
-                sqlite3_finalize( prepStmt );
-            }
             char buf[TC_LOG_BUF_SIZE];
             snprintf( buf, sizeof(buf)
                     , "[%s] (%s) failed, nid=%d, id=%d, error: %s\n"

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/tmsync.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/tmsync.cxx 
b/core/sqf/monitor/linux/tmsync.cxx
index 3e72241..60d9f40 100644
--- a/core/sqf/monitor/linux/tmsync.cxx
+++ b/core/sqf/monitor/linux/tmsync.cxx
@@ -661,31 +661,41 @@ void CTmSync_Container::ProcessTmSyncReply( struct 
message_def * msg )
         if (trace_settings & (TRACE_REQUEST | TRACE_TMSYNC))
             trace_printf("%s@%d - Unsolicited TmSync reply, handle=%d\n",
                          method_name, __LINE__, tmsync_req->Handle);
-        tmsync_req->Completed = true;
-        UnsolicitedComplete( msg );
-        if ( msg->u.reply.u.unsolicited_tm_sync.return_code != MPI_SUCCESS )
+        if (msg->u.reply.u.unsolicited_tm_sync.return_code == MPI_SUCCESS)
         {
             TmSyncReplyCode |= msg->u.reply.u.unsolicited_tm_sync.return_code;
-        }
-        if ( TmSyncPNid == MyPNID )
-        {
-            if (trace_settings & (TRACE_REQUEST | TRACE_TMSYNC))
-                trace_printf("%s@%d - Local Unsolicited TmSync reply, handle="
-                             "%d\n", method_name, __LINE__,
-                             tmsync_req->Handle);
-            if ( GetTmSyncReplies() == GetTotalSlaveTmSyncCount() )
+            tmsync_req->Completed = true;
+            UnsolicitedComplete( msg );
+            if ( TmSyncPNid == MyPNID )
             {
-                UpdateTmSyncState( TmSyncReplyCode );
-                UnsolicitedCompleteDone();
+                if (trace_settings & (TRACE_REQUEST | TRACE_TMSYNC))
+                    trace_printf("%s@%d - Local Unsolicited TmSync reply, 
handle="
+                                 "%d\n", method_name, __LINE__,
+                                 tmsync_req->Handle);
+                if ( GetTmSyncReplies() == GetTotalSlaveTmSyncCount() )
+                {
+                    UpdateTmSyncState( TmSyncReplyCode );
+                    UnsolicitedCompleteDone();
+                }
             }
-        }
-        else
-        {
-            if ( GetTmSyncReplies() == GetTotalSlaveTmSyncCount() )
+            else
             {
-                CommitTmDataBlock(TmSyncReplyCode);
+                if ( GetTmSyncReplies() == GetTotalSlaveTmSyncCount() )
+                {
+                    CommitTmDataBlock(TmSyncReplyCode);
+                }
             }
         }
+        else
+        { // The Seabed callback has not been registered, try again
+            if (trace_settings & (TRACE_REQUEST | TRACE_TMSYNC))
+                trace_printf("%s@%d - Retrying Local Unsolicited TmSync, 
handle="
+                             "%d\n", method_name, __LINE__,
+                             tmsync_req->Handle);
+            PendingSlaveTmSyncCount--;
+            tmsync_req->Completed = false;
+            SendUnsolicitedMessages();
+        }
     }
     else
     {

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/monitor/linux/zclient.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/zclient.cxx 
b/core/sqf/monitor/linux/zclient.cxx
index 23dca8a..36a0600 100644
--- a/core/sqf/monitor/linux/zclient.cxx
+++ b/core/sqf/monitor/linux/zclient.cxx
@@ -650,7 +650,7 @@ void CZClient::HandleExpiredZNode( void )
     
         monZnode.assign( znodeQueue_.front() );
 
-        if (trace_settings)
+        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
         {
             trace_printf("%s@%d" " - znodePath=%s, znodeQueue_.size=%ld\n"
                         , method_name, __LINE__
@@ -659,10 +659,6 @@ void CZClient::HandleExpiredZNode( void )
 
         znodeQueue_.pop_front();
         
-        trace_printf( "%s@%d" " - Checking znode=%s\n"
-                    , method_name, __LINE__
-                    , monZnode.c_str() );
-
         strcpy( pathStr, monZnode.c_str() );
 
         tknStart++; // skip the first '/'

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/sql/scripts/sqnodestatus
----------------------------------------------------------------------
diff --git a/core/sqf/sql/scripts/sqnodestatus 
b/core/sqf/sql/scripts/sqnodestatus
index 44dc93f..56511fc 100755
--- a/core/sqf/sql/scripts/sqnodestatus
+++ b/core/sqf/sql/scripts/sqnodestatus
@@ -34,25 +34,34 @@ my $node_context=readpipe("trafconf -name");
 my %node_hash=();
 my $sq_mon_ssh_options=readpipe("echo -n \$SQ_MON_SSH_OPTIONS");
 my $json=$ARGV[0];
+my $node_name=$ARGV[1];
 
 &main();
 
 sub main()
 {
-  #$node_context=~s/-w//ig;
-  #print "node_context=${node_context}";
-  chomp($node_context);
-  my @nodes=split(' ',$node_context);
-  foreach my $node(@nodes)
+  #print "json=${json}\n";
+  #print "node_name=${node_name}\n";
+  if ($ARGV[0] ne '-n')
   {
-     $check_flag=check_node_status($node);
+     #print "node_context=${node_context}";
+     chomp($node_context);
+     my @nodes=split(' ',$node_context);
+     foreach my $node(@nodes)
+     {
+        $check_flag=check_node_status($node);
+     }
+  }
+  else
+  {
+     $check_flag=check_node_status($node_name);
   }
   print_node_status();
 }
 
 sub print_node_status()
 {
-    if ($json) 
+    if ($json eq '-json')
     {
         $comma="";
         print "[";

http://git-wip-us.apache.org/repos/asf/trafodion/blob/e832d827/core/sqf/src/seabed/src/msmon.cpp
----------------------------------------------------------------------
diff --git a/core/sqf/src/seabed/src/msmon.cpp 
b/core/sqf/src/seabed/src/msmon.cpp
index 311e099..cad7146 100644
--- a/core/sqf/src/seabed/src/msmon.cpp
+++ b/core/sqf/src/seabed/src/msmon.cpp
@@ -6609,7 +6609,7 @@ void msg_mon_recv_unsol_msg_loc_cbt(Mon_Msg_Type *pp_msg, 
int) {
     } else {
         if (gv_ms_trace_mon)
             trace_where_printf(WHERE, "no tmsync callback, replying with 
error\n");
-        lv_handle = -1;
+        lv_handle = pp_msg->u.request.u.unsolicited_tm_sync.handle;
         lv_cbret = 1; // set error
     }
     lv_err = gp_local_mon_io->acquire_msg(&lp_msg);

Reply via email to