Fixes to Name Server replication logic.
Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/1f7eabb0 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/1f7eabb0 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/1f7eabb0 Branch: refs/heads/master Commit: 1f7eabb0b8327e07eda4727071f8dcda1d307df8 Parents: 3931a75 Author: Zalo Correa <[email protected]> Authored: Wed May 16 19:12:26 2018 -0700 Committer: Zalo Correa <[email protected]> Committed: Wed May 16 19:12:26 2018 -0700 ---------------------------------------------------------------------- .../export/include/common/evl_sqlog_eventnum.h | 1 + core/sqf/export/include/trafconf/trafconfig.h | 2 - core/sqf/monitor/linux/cluster.cxx | 65 +++- core/sqf/monitor/linux/cluster.h | 2 + core/sqf/monitor/linux/internal.h | 22 +- core/sqf/monitor/linux/monitor.cxx | 188 ++++++++-- core/sqf/monitor/linux/msgdef.h | 36 +- core/sqf/monitor/linux/nameserver.cxx | 96 ++++- core/sqf/monitor/linux/nscommacceptmon.cxx | 56 ++- core/sqf/monitor/linux/nsprocess.cxx | 29 +- core/sqf/monitor/linux/nsreqnewproc.cxx | 9 +- core/sqf/monitor/linux/nsreqprocinfons.cxx | 30 +- core/sqf/monitor/linux/pnode.cxx | 25 +- core/sqf/monitor/linux/pnode.h | 2 + core/sqf/monitor/linux/process.cxx | 76 +++- core/sqf/monitor/linux/process.h | 36 +- core/sqf/monitor/linux/replicate.cxx | 130 ++++++- core/sqf/monitor/linux/replicate.h | 10 + core/sqf/monitor/linux/reqqueue.cxx | 357 ++++++++++++++++--- core/sqf/monitor/linux/reqqueue.h | 128 +++++-- core/sqf/monitor/linux/zclient.cxx | 5 +- 21 files changed, 1079 insertions(+), 226 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/export/include/common/evl_sqlog_eventnum.h ---------------------------------------------------------------------- diff --git a/core/sqf/export/include/common/evl_sqlog_eventnum.h b/core/sqf/export/include/common/evl_sqlog_eventnum.h index 2d630c9..1f4d166 100644 --- a/core/sqf/export/include/common/evl_sqlog_eventnum.h +++ b/core/sqf/export/include/common/evl_sqlog_eventnum.h @@ -738,6 +738,7 @@ #define MON_INTREQ_EXIT_1 101181701 #define MON_INTREQ_NEWPROC_1 101181801 #define MON_INTREQ_NEWPROC_2 101181802 +#define MON_INTREQ_NEWPROC_3 101181803 #define MON_INTREQ_NOTIFY_1 101181901 #define MON_INTREQ_NOTIFY_2 101181902 #define MON_INTREQ_NOTIFY_3 101181903 http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/export/include/trafconf/trafconfig.h ---------------------------------------------------------------------- diff --git a/core/sqf/export/include/trafconf/trafconfig.h b/core/sqf/export/include/trafconf/trafconfig.h index 2eba1c8..4661840 100644 --- a/core/sqf/export/include/trafconf/trafconfig.h +++ b/core/sqf/export/include/trafconf/trafconfig.h @@ -111,7 +111,6 @@ typedef enum { } TcZoneType_t; typedef enum { -//enum TC_STORAGE_TYPE { TCDBSTOREUNDEFINED = 0 , TCDBMYSQL = 1 // MySQL Database , TCDBPOSTGRESQL = 2 // PostgresQL Database [TBD] @@ -119,7 +118,6 @@ typedef enum { } TcStorageType_t; typedef enum { -//enum TC_ERRORS { TCSUCCESS = 0 // Successful operation , TCNOTIMPLEMENTED = -1 // Not implemented , TCNOTINIT = -2 // Database not open http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/cluster.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx index a7ccdf1..8768d2e 100644 --- a/core/sqf/monitor/linux/cluster.cxx +++ b/core/sqf/monitor/linux/cluster.cxx @@ -863,6 +863,8 @@ CCluster::CCluster (void) ,myMonConnCount_(0) ,minMonConnCount_(0) ,minMonConnPnid_(-1) +#else + ,clusterProcCount_(0) #endif { int i; @@ -2644,7 +2646,12 @@ void CCluster::HandleOtherNodeMsg (struct internal_msg_def *recv_msg, if ( MyNode->IsMyNode(recv_msg->u.process.nid) ) { // Need to create process on this node. // Queue process creation request for handling by worker thread +#ifdef NAMESERVER_PROCESS + ReqQueue.enqueueNewProcNsReq( &recv_msg->u.process ); +#endif +#ifndef NAMESERVER_PROCESS ReqQueue.enqueueNewProcReq( &recv_msg->u.process ); +#endif } break; @@ -7056,6 +7063,8 @@ void CCluster::UpdateClusterState( bool &doShutdown, } #ifdef NAMESERVER_PROCESS nodestate[index].monConnCount = -1; +#else + nodestate[index].monProcCount = 0; #endif continue; @@ -7081,6 +7090,8 @@ void CCluster::UpdateClusterState( bool &doShutdown, nodestate[index].nodeMask = recvBuf->nodeInfo.nodeMask; #ifdef NAMESERVER_PROCESS nodestate[index].monConnCount = recvBuf->nodeInfo.monConnCount; +#else + nodestate[index].monProcCount = recvBuf->nodeInfo.monProcCount; #endif for ( int i =0; i < MAX_NODE_MASKS ; i++ ) @@ -7199,6 +7210,8 @@ void CCluster::UpdateClusterState( bool &doShutdown, nodestate[MyPNID].nodeMask = upNodes_; #ifdef NAMESERVER_PROCESS nodestate[MyPNID].monConnCount = Node[MyPNID]->GetMonConnCount(); +#else + nodestate[MyPNID].monProcCount = Node[MyPNID]->GetNumProcs(); #endif // Examine status returned from MPI receive requests @@ -7263,6 +7276,8 @@ void CCluster::UpdateClusterState( bool &doShutdown, } #ifdef NAMESERVER_PROCESS nodestate[index].monConnCount = -1; +#else + nodestate[index].monProcCount = 0; #endif if ( validateNodeDown_ ) @@ -7466,6 +7481,15 @@ void CCluster::UpdateClusterState( bool &doShutdown, myMonConnCount_ = nodestate[MyPNID].monConnCount; minMonConnCount_ = minConnCount; minMonConnPnid_ = minConnPnid; +#else + if (NameServerEnabled) + { + clusterProcCount_ = 0; + for (int index = 0; index < GetConfigPNodesMax(); index++) + { + clusterProcCount_ += nodestate[index].monProcCount; + } + } #endif TRACE_EXIT; @@ -7667,17 +7691,32 @@ bool CCluster::checkIfDone ( ) nameServerCount ); #else - if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) - trace_printf("%s@%d - Node %d shutdown level=%d, state=%s. Process " - "count=%d, internal state=%d, currentNodes_=%d, " - "local process count=%d\n", - method_name, __LINE__, MyNode->GetPNid(), - MyNode->GetShutdownLevel(), - StateString(MyNode->GetState()), - Nodes->ProcessCount(), - MyNode->getInternalState(), - currentNodes_, MyNode->GetNumProcs()); - + if (NameServerEnabled) + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Node %d shutdown level=%d, state=%s. Cluster process " + "count=%d, internal state=%d, currentNodes_=%d, " + "local process count=%d\n", + method_name, __LINE__, MyNode->GetPNid(), + MyNode->GetShutdownLevel(), + StateString(MyNode->GetState()), + clusterProcCount_, + MyNode->getInternalState(), + currentNodes_, MyNode->GetNumProcs()); + } + else + { + if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) + trace_printf("%s@%d - Node %d shutdown level=%d, state=%s. Process " + "count=%d, internal state=%d, currentNodes_=%d, " + "local process count=%d\n", + method_name, __LINE__, MyNode->GetPNid(), + MyNode->GetShutdownLevel(), + StateString(MyNode->GetState()), + Nodes->ProcessCount(), + MyNode->getInternalState(), + currentNodes_, MyNode->GetNumProcs()); + } #endif // Check if we are also done if (( MyNode->GetState() != State_Down ) && @@ -7704,7 +7743,7 @@ bool CCluster::checkIfDone ( ) if ( NameServerEnabled ) { - if ( Nodes->ProcessCount() == 0 ) // all Name Servers exited + if ( clusterProcCount_ == 0 ) // all Name Servers exited { if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL | TRACE_SYNC)) trace_printf("%s@%d - Monitor signaled to exit.\n", method_name, __LINE__); @@ -7714,7 +7753,7 @@ bool CCluster::checkIfDone ( ) // we need to sync one more time so other nodes see our state return false; } - else if ( (Nodes->ProcessCount() <= + else if ( (clusterProcCount_ <= (currentNodes_ * (MAX_PRIMITIVES+1)) ) // only WDGs and Name Servers alive && (MyNode->GetNumProcs() <= (MAX_PRIMITIVES+1) ) // only WDGs and Name Servers alive http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/cluster.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/cluster.h b/core/sqf/monitor/linux/cluster.h index 9d0925f..1d7e1ac 100644 --- a/core/sqf/monitor/linux/cluster.h +++ b/core/sqf/monitor/linux/cluster.h @@ -388,6 +388,8 @@ private: int myMonConnCount_; int minMonConnCount_; int minMonConnPnid_; +#else + int clusterProcCount_; #endif int Allgather(int nbytes, void *sbuf, char *rbuf, int tag, MPI_Status *stats); http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/internal.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/internal.h b/core/sqf/monitor/linux/internal.h index 6329fc7..35fa32a 100644 --- a/core/sqf/monitor/linux/internal.h +++ b/core/sqf/monitor/linux/internal.h @@ -127,9 +127,15 @@ struct clone_def int argc; // number of command line arguments struct timespec creation_time; // process creation time +#ifdef NAMESERVER_PROCESS + int pathLen; + int ldpathLen; + int programLen; +#else strId_t pathStrId; // program lookup path (string id) strId_t ldpathStrId; // library load path (string id) strId_t programStrId; // full path to object file (string id) +#endif int nameLen; int portLen; @@ -296,7 +302,6 @@ struct node_name_def char new_name[MPI_MAX_PROCESSOR_NAME]; }; - struct notify_def { int nid; // Node id of process being notified @@ -309,7 +314,6 @@ struct notify_def _TM_Txid_External trans_id; // Associated TransID }; - struct process_def { bool backup; // true for a backup process @@ -328,12 +332,16 @@ struct process_def int pair_parent_pid; // process id of real process pair parent process Verifier_t pair_parent_verifier; // process id of real process pair parent process int argc; // number of command line arguments - + void *tag; // process tag +#ifdef NAMESERVER_PROCESS + int pathLen; + int ldpathLen; + int programLen; +#else strId_t pathStrId; // program lookup path (string id) strId_t ldpathStrId; // library load path (string id) strId_t programStrId; // full path to object file (string id) - - void *tag; // process tag +#endif int nameLen; int argvLen; int infileLen; @@ -341,8 +349,6 @@ struct process_def char stringData; // variable length string data }; - - struct process_init_def { int nid; // Node id of child process @@ -513,6 +519,8 @@ typedef struct cluster_state_def upNodes_t nodeMask; // Set of nodes currently "up" #ifdef NAMESERVER_PROCESS int monConnCount; // monitor connections +#else + int monProcCount; // number of processes #endif } cluster_state_def_t; http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/monitor.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/monitor.cxx b/core/sqf/monitor/linux/monitor.cxx index 17ed27d..6757f2c 100755 --- a/core/sqf/monitor/linux/monitor.cxx +++ b/core/sqf/monitor/linux/monitor.cxx @@ -606,9 +606,11 @@ char * CMonitor::ProcCopy(char *bufPtr, CProcess *process) procObj->priority = process->GetPriority(); procObj->backup = process->IsBackup(); procObj->unhooked = process->IsUnhooked(); +#ifndef NAMESERVER_PROCESS procObj->pathStrId = process->pathStrId(); procObj->ldpathStrId = process->ldPathStrId(); procObj->programStrId = process->programStrId(); +#endif procObj->os_pid = process->GetPid(); procObj->verifier = process->GetVerifier(); procObj->prior_pid = process->GetPriorPid (); @@ -658,6 +660,47 @@ char * CMonitor::ProcCopy(char *bufPtr, CProcess *process) procObj->portLen = 0; } +#ifdef NAMESERVER_PROCESS + if (strlen(process->path())) + { + // Copy the path + procObj->pathLen = strlen(process->path()) + 1; + memcpy(stringData, process->path(), procObj->pathLen ); + stringData += procObj->pathLen; + stringDataLen = procObj->pathLen; + } + else + { + procObj->pathLen = 0; + } + + if (strlen(process->ldpath())) + { + // Copy the ldpath + procObj->ldpathLen = strlen(process->ldpath()) + 1; + memcpy(stringData, process->ldpath(), procObj->ldpathLen ); + stringData += procObj->ldpathLen; + stringDataLen = procObj->ldpathLen; + } + else + { + procObj->ldpathLen = 0; + } + + if (strlen(process->program())) + { + // Copy the program + procObj->programLen = strlen(process->program()) + 1; + memcpy(stringData, process->program(), procObj->programLen ); + stringData += procObj->programLen; + stringDataLen = procObj->programLen; + } + else + { + procObj->programLen = 0; + } +#endif + if (process->IsPersistent()) { if (strlen(process->infile())) @@ -696,28 +739,6 @@ char * CMonitor::ProcCopy(char *bufPtr, CProcess *process) } procObj->persistent = true; - - if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY)) - trace_printf( "%s@%d - Packing process string data:\n" - " name(%d) =%s\n" - " port(%d) =%s\n" - " infile(%d) =%s\n" - " outfile(%d) =%s\n" - " userArgv(%d) =%s\n" - " stringData(%d) =%s\n" - , method_name, __LINE__ - , procObj->nameLen - , process->GetName() - , procObj->portLen - , process->GetPort() - , procObj->infileLen - , process->infile() - , procObj->outfileLen - , process->outfile() - , procObj->argvLen - , procObj->argvLen?process->userArgv():"" - , stringDataLen - , stringDataLen?&procObj->stringData:"" ); } else { @@ -727,6 +748,61 @@ char * CMonitor::ProcCopy(char *bufPtr, CProcess *process) procObj->persistent = false; } +#ifdef NAMESERVER_PROCESS + if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY)) + trace_printf( "%s@%d - Packing process string data:\n" + " name(%d) =%s\n" + " port(%d) =%s\n" + " path(%d) =%s\n" + " ldpath(%d) =%s\n" + " program(%d) =%s\n" + " infile(%d) =%s\n" + " outfile(%d) =%s\n" + " userArgv(%d) =%s\n" + " stringData(%d) =%s\n" + , method_name, __LINE__ + , procObj->nameLen + , process->GetName() + , procObj->portLen + , process->GetPort() + , procObj->pathLen + , process->path() + , procObj->ldpathLen + , process->ldpath() + , procObj->programLen + , process->program() + , procObj->infileLen + , process->infile() + , procObj->outfileLen + , process->outfile() + , procObj->argvLen + , procObj->argvLen?process->userArgv():"" + , stringDataLen + , stringDataLen?&procObj->stringData:"" ); +#else + if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY)) + trace_printf( "%s@%d - Packing process string data:\n" + " name(%d) =%s\n" + " port(%d) =%s\n" + " infile(%d) =%s\n" + " outfile(%d) =%s\n" + " userArgv(%d) =%s\n" + " stringData(%d) =%s\n" + , method_name, __LINE__ + , procObj->nameLen + , process->GetName() + , procObj->portLen + , process->GetPort() + , procObj->infileLen + , process->infile() + , procObj->outfileLen + , process->outfile() + , procObj->argvLen + , procObj->argvLen?process->userArgv():"" + , stringDataLen + , stringDataLen?&procObj->stringData:"" ); +#endif + TRACE_EXIT; return stringData; } @@ -802,6 +878,11 @@ void CMonitor::UnpackProcObjs( char *&buffer, int procCount ) CNode * node = NULL; CProcess * process = NULL; int stringDataLen; +#ifdef NAMESERVER_PROCESS + char *path = NULL; + char *ldpath = NULL; + char *program = NULL; +#endif char *name = NULL; char *port = NULL; char *infile = NULL; @@ -824,7 +905,7 @@ void CMonitor::UnpackProcObjs( char *&buffer, int procCount ) if (procObj->nameLen) { - name = &procObj->stringData; + name = &stringData[stringDataLen]; stringDataLen += procObj->nameLen; } @@ -834,6 +915,26 @@ void CMonitor::UnpackProcObjs( char *&buffer, int procCount ) stringDataLen += procObj->portLen; } +#ifdef NAMESERVER_PROCESS + if (procObj->pathLen) + { + path = &stringData[stringDataLen]; + stringDataLen += procObj->pathLen; + } + + if (procObj->ldpathLen) + { + ldpath = &stringData[stringDataLen]; + stringDataLen += procObj->ldpathLen; + } + + if (procObj->programLen) + { + program = &stringData[stringDataLen]; + stringDataLen += procObj->programLen; + } +#endif + if (procObj->infileLen) { infile = &stringData[stringDataLen]; @@ -852,6 +953,40 @@ void CMonitor::UnpackProcObjs( char *&buffer, int procCount ) stringDataLen += procObj->argvLen; } +#ifdef NAMESERVER_PROCESS + if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY)) + trace_printf( "%s@%d - Unpacking process string data:\n" + " stringData(%d) =%s\n" + " name(%d) =%s\n" + " port(%d) =%s\n" + " path(%d) =%s\n" + " ldpath(%d) =%s\n" + " program(%d) =%s\n" + " infile(%d) =%s\n" + " outfile(%d) =%s\n" + " userArgc =%d\n" + " userArgv(%d) =%s\n" + , method_name, __LINE__ + , stringDataLen + , stringDataLen?&procObj->stringData:"" + , procObj->nameLen + , procObj->nameLen?name:"" + , procObj->portLen + , procObj->portLen?port:"" + , procObj->pathLen + , procObj->pathLen?path:"" + , procObj->ldpathLen + , procObj->ldpathLen?ldpath:"" + , procObj->programLen + , procObj->programLen?program:"" + , procObj->infileLen + , procObj->infileLen?infile:"" + , procObj->outfileLen + , procObj->outfileLen?outfile:"" + , procObj->argc + , procObj->argvLen + , procObj->argvLen?userargv:"" ); +#else if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY)) trace_printf( "%s@%d - Unpacking process string data:\n" " stringData(%d) =%s\n" @@ -875,6 +1010,7 @@ void CMonitor::UnpackProcObjs( char *&buffer, int procCount ) , procObj->argc , procObj->argvLen , procObj->argvLen?userargv:"" ); +#endif process = node->CloneProcess (procObj->nid, procObj->type, @@ -890,9 +1026,15 @@ void CMonitor::UnpackProcObjs( char *&buffer, int procCount ) procObj->parent_verifier, procObj->event_messages, procObj->system_messages, +#ifdef NAMESERVER_PROCESS + path, + ldpath, + program, +#else procObj->pathStrId, procObj->ldpathStrId, procObj->programStrId, +#endif procObj->infileLen?infile:(char *)"", procObj->outfileLen?outfile:(char *)"", &procObj->creation_time, http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/msgdef.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/msgdef.h b/core/sqf/monitor/linux/msgdef.h index ca76fc1..639c15c 100644 --- a/core/sqf/monitor/linux/msgdef.h +++ b/core/sqf/monitor/linux/msgdef.h @@ -566,16 +566,17 @@ struct NewProcess_def struct NewProcessNs_def { - int parent_nid; // parent's node id - int parent_pid; // parent's process id - Verifier_t parent_verifier; // parent's process verifier - int pair_parent_nid; // node id of real process pair parent process - int pair_parent_pid; // process id of real process pair parent process - Verifier_t pair_parent_verifier; // process id of real process pair parent process int nid; // node id int pid; // process id Verifier_t verifier; // process verifier + char process_name[MAX_PROCESS_NAME]; // process name PROCESSTYPE type; // Identifies the process handling catagory + int parent_nid; // parent's node id + int parent_pid; // parent's process id + Verifier_t parent_verifier; // parent's process verifier + int pair_parent_nid; // node id of real process pair parent process + int pair_parent_pid; // process id of real process pair parent process + Verifier_t pair_parent_verifier; // process id of real process pair parent process int priority; // Linux system priority int debug; // if non-zero, starts processing using GDB int backup; // if non-zero, starts process as backup @@ -584,10 +585,12 @@ struct NewProcessNs_def bool event_messages; // true if want event messages bool system_messages; // true if want system messages long long tag; // user defined tag to be sent in completion notice - strId_t pathStrId; // program lookup path (string id) - strId_t ldpathStrId; // library load path (string id) - strId_t programStrId; // full path to object file (string id) - char process_name[MAX_PROCESS_NAME]; // process name +// strId_t pathStrId; // program lookup path (string id) +// strId_t ldpathStrId; // library load path (string id) +// strId_t programStrId; // full path to object file (string id) + char path[MAX_SEARCH_PATH]; // process's object lookup path to program + char ldpath[MAX_SEARCH_PATH]; // process's library load path for program + char program[MAX_PROCESS_PATH]; // full path to object file char port_name[MPI_MAX_PORT_NAME]; // mpi port name from MPI_Open_port int argc; // number of additional command line argument char argv[MAX_ARGS][MAX_ARG_SIZE]; // array of additional command line arguments @@ -978,6 +981,9 @@ struct ProcessInfoNs_reply_def int parent_nid; // parent's node id int parent_pid; // parent's process id Verifier_t parent_verifier; // parent's process verifier + int pair_parent_nid; // node id of real process pair parent process + int pair_parent_pid; // process id of real process pair parent process + Verifier_t pair_parent_verifier; // process id of real process pair parent process int priority; // Linux system priority int backup; // if non-zero, starts process as backup STATE state; // process's current state @@ -985,10 +991,12 @@ struct ProcessInfoNs_reply_def bool event_messages; // true if want event messages bool system_messages; // true if want system messages long long tag; // user defined tag to be sent in completion notice - char program[MAX_PROCESS_PATH]; // process's object file name - strId_t pathStrId; // program lookup path (string id) - strId_t ldpathStrId; // library load path (string id) - strId_t programStrId; // full path to object file (string id) +// strId_t pathStrId; // program lookup path (string id) +// strId_t ldpathStrId; // library load path (string id) +// strId_t programStrId; // full path to object file (string id) + char path[MAX_SEARCH_PATH]; // process's object lookup path to program + char ldpath[MAX_SEARCH_PATH]; // process's library load path for program + char program[MAX_PROCESS_PATH]; // program file name char port_name[MPI_MAX_PORT_NAME]; // mpi port name from MPI_Open_port int argc; // number of additional command line argument char argv[MAX_ARGS][MAX_ARG_SIZE]; // array of additional command line arguments http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/nameserver.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nameserver.cxx b/core/sqf/monitor/linux/nameserver.cxx index 690e23d..e9f1900 100644 --- a/core/sqf/monitor/linux/nameserver.cxx +++ b/core/sqf/monitor/linux/nameserver.cxx @@ -599,17 +599,83 @@ int CNameServer::ProcessNew(CProcess* process ) msgnew->unhooked = process->IsUnhooked(); msgnew->event_messages = process->IsEventMessages(); msgnew->system_messages = process->IsSystemMessages(); - msgnew->pathStrId = process->pathStrId(); - msgnew->ldpathStrId = process->ldPathStrId(); - msgnew->programStrId = process->programStrId(); +// msgnew->pathStrId = process->pathStrId(); +// msgnew->ldpathStrId = process->ldPathStrId(); +// msgnew->programStrId = process->programStrId(); + strcpy( msgnew->path, process->path() ); + strcpy( msgnew->ldpath, process->ldpath() ); + strcpy( msgnew->program, process->program() ); strcpy( msgnew->process_name, process->GetName() ); strcpy( msgnew->port_name, process->GetPort() ); msgnew->argc = process->argc(); - memcpy(msgnew->argv, process->userArgv(), process->userArgvLen()); + process->getUserArgs(msgnew->argv); strcpy( msgnew->infile, process->infile() ); strcpy( msgnew->outfile, process->outfile() ); msgnew->creation_time = process->GetCreationTime(); + if ( trace_settings & ( TRACE_NS | TRACE_REQUEST) ) + { + trace_printf( "%s@%d - Received monitor request new-process data.\n" + " msg.new_process_ns.nid=%d\n" + " msg.new_process_ns.pid=%d\n" + " msg.new_process_ns.verifier=%d\n" + " msg.new_process_ns.process_name=%s\n" + " msg.new_process_ns.type=%d\n" + " msg.new_process_ns.parent_nid=%d\n" + " msg.new_process_ns.parent_pid=%d\n" + " msg.new_process_ns.parent_verifier=%d\n" + " msg.new_process_ns.pair_parent_nid=%d\n" + " msg.new_process_ns.pair_parent_pid=%d\n" + " msg.new_process_ns.pair_parent_verifier=%d\n" + " msg.new_process_ns.priority=%d\n" + " msg.new_process_ns.backup=%d\n" + " msg.new_process_ns.unhooked=%d\n" + " msg.new_process_ns.event_messages=%d\n" + " msg.new_process_ns.system_messages=%d\n" + " msg.new_process_ns.path=%s\n" + " msg.new_process_ns.ldpath=%s\n" + " msg.new_process_ns.program=%s\n" + " msg.new_process_ns.port=%s\n" + " msg.new_process_ns.infile=%s\n" + " msg.new_process_ns.outfile=%s\n" + " msg.new_process_ns.creation_time=%ld(secs):%ld(nsecs)\n" + , method_name, __LINE__ + , msgnew->nid + , msgnew->pid + , msgnew->verifier + , msgnew->process_name + , msgnew->type + , msgnew->parent_nid + , msgnew->parent_pid + , msgnew->parent_verifier + , msgnew->pair_parent_nid + , msgnew->pair_parent_pid + , msgnew->pair_parent_verifier + , msgnew->priority + , msgnew->backup + , msgnew->unhooked + , msgnew->event_messages + , msgnew->system_messages + , msgnew->path + , msgnew->ldpath + , msgnew->program + , msgnew->port_name + , msgnew->infile + , msgnew->outfile + , msgnew->creation_time.tv_sec + , msgnew->creation_time.tv_nsec + ); + trace_printf("%s@%d - msg.new_process_ns.argc=%d\n" + , method_name, __LINE__ + , msgnew->argc ); + for (int i=0; i < msgnew->argc; i++) + { + trace_printf("%s@%d - msg.new_process_ns.argv[%d]=%s\n" + , method_name, __LINE__ + , i, msgnew->argv[i]); + } + } + int error = SendReceive(&msg ); TRACE_EXIT; @@ -778,10 +844,12 @@ int CNameServer::SendReceive( struct message_def* msg ) " process_info_ns.unhooked=%d\n" " process_info_ns.event_messages=%d\n" " process_info_ns.system_messages=%d\n" + " process_info_ns.path=%s\n" + " process_info_ns.ldpath=%s\n" " process_info_ns.program=%s\n" - " process_info_ns.pathStrId=%d:%d\n" - " process_info_ns.ldpathStrId=%d:%d\n" - " process_info_ns.programStrId=%d:%d\n" +// " process_info_ns.pathStrId=%d:%d\n" +// " process_info_ns.ldpathStrId=%d:%d\n" +// " process_info_ns.programStrId=%d:%d\n" " process_info_ns.port_name=%s\n" " process_info_ns.argc=%d\n" // " process_info_ns.argv=[%.*s]\n" @@ -806,13 +874,15 @@ int CNameServer::SendReceive( struct message_def* msg ) , msg->u.reply.u.process_info_ns.unhooked , msg->u.reply.u.process_info_ns.event_messages , msg->u.reply.u.process_info_ns.system_messages + , msg->u.reply.u.process_info_ns.path + , msg->u.reply.u.process_info_ns.ldpath , msg->u.reply.u.process_info_ns.program - , msg->u.reply.u.process_info_ns.pathStrId.nid - , msg->u.reply.u.process_info_ns.pathStrId.id - , msg->u.reply.u.process_info_ns.ldpathStrId.nid - , msg->u.reply.u.process_info_ns.ldpathStrId.id - , msg->u.reply.u.process_info_ns.programStrId.nid - , msg->u.reply.u.process_info_ns.programStrId.id +// , msg->u.reply.u.process_info_ns.pathStrId.nid +// , msg->u.reply.u.process_info_ns.pathStrId.id +// , msg->u.reply.u.process_info_ns.ldpathStrId.nid +// , msg->u.reply.u.process_info_ns.ldpathStrId.id +// , msg->u.reply.u.process_info_ns.programStrId.nid +// , msg->u.reply.u.process_info_ns.programStrId.id , msg->u.reply.u.process_info_ns.port_name , msg->u.reply.u.process_info_ns.argc // , &msg->u.reply.u.process_info_ns.argv http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/nscommacceptmon.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nscommacceptmon.cxx b/core/sqf/monitor/linux/nscommacceptmon.cxx index 6282b0a..edddca7 100644 --- a/core/sqf/monitor/linux/nscommacceptmon.cxx +++ b/core/sqf/monitor/linux/nscommacceptmon.cxx @@ -292,74 +292,64 @@ void CCommAcceptMon::monReqNewProcess( struct message_def* msg, int sockFd ) if ( trace_settings & ( TRACE_NS | TRACE_REQUEST) ) { trace_printf( "%s@%d - Received monitor request new-process data.\n" - " msg.new_process_ns.parent_nid=%d\n" - " msg.new_process_ns.parent_pid=%d\n" - " msg.new_process_ns.parent_verifier=%d\n" - " msg.new_process_ns.pair_parent_nid=%d\n" - " msg.new_process_ns.pair_parent_pid=%d\n" - " msg.new_process_ns.pair_parent_verifier=%d\n" " msg.new_process_ns.nid=%d\n" " msg.new_process_ns.pid=%d\n" " msg.new_process_ns.verifier=%d\n" - " msg.new_process_ns.backup=%d\n" - " msg.new_process_ns.event_messages=%d\n" - " msg.new_process_ns.system_messages=%d\n" + " msg.new_process_ns.process_name=%s\n" " msg.new_process_ns.type=%d\n" " msg.new_process_ns.parent_nid=%d\n" " msg.new_process_ns.parent_pid=%d\n" " msg.new_process_ns.parent_verifier=%d\n" + " msg.new_process_ns.pair_parent_nid=%d\n" + " msg.new_process_ns.pair_parent_pid=%d\n" + " msg.new_process_ns.pair_parent_verifier=%d\n" " msg.new_process_ns.priority=%d\n" " msg.new_process_ns.backup=%d\n" " msg.new_process_ns.unhooked=%d\n" " msg.new_process_ns.event_messages=%d\n" " msg.new_process_ns.system_messages=%d\n" - " msg.new_process_ns.pathStrId=%d:%d\n" - " msg.new_process_ns.ldpathStrId=%d:%d\n" - " msg.new_process_ns.programStrId=%d:%d\n" - " msg.new_process_ns.process_name=%s\n" + " msg.new_process_ns.path=%s\n" + " msg.new_process_ns.ldpath=%s\n" + " msg.new_process_ns.program=%s\n" " msg.new_process_ns.port=%s\n" - " msg.new_process_ns.argc=%d\n" - //" msg.new_process_ns.argv=%s\n" " msg.new_process_ns.infile=%s\n" " msg.new_process_ns.outfile=%s\n" " msg.new_process_ns.creation_time=%ld(secs):%ld(nsecs)\n" , method_name, __LINE__ - , msg->u.request.u.new_process_ns.parent_nid - , msg->u.request.u.new_process_ns.parent_pid - , msg->u.request.u.new_process_ns.parent_verifier - , msg->u.request.u.new_process_ns.pair_parent_nid - , msg->u.request.u.new_process_ns.pair_parent_pid - , msg->u.request.u.new_process_ns.pair_parent_verifier , msg->u.request.u.new_process_ns.nid , msg->u.request.u.new_process_ns.pid , msg->u.request.u.new_process_ns.verifier - , msg->u.request.u.new_process_ns.backup - , msg->u.request.u.new_process_ns.event_messages - , msg->u.request.u.new_process_ns.system_messages + , msg->u.request.u.new_process_ns.process_name , msg->u.request.u.new_process_ns.type , msg->u.request.u.new_process_ns.parent_nid , msg->u.request.u.new_process_ns.parent_pid , msg->u.request.u.new_process_ns.parent_verifier + , msg->u.request.u.new_process_ns.pair_parent_nid + , msg->u.request.u.new_process_ns.pair_parent_pid + , msg->u.request.u.new_process_ns.pair_parent_verifier , msg->u.request.u.new_process_ns.priority , msg->u.request.u.new_process_ns.backup , msg->u.request.u.new_process_ns.unhooked , msg->u.request.u.new_process_ns.event_messages , msg->u.request.u.new_process_ns.system_messages - , msg->u.request.u.new_process_ns.pathStrId.nid - , msg->u.request.u.new_process_ns.pathStrId.id - , msg->u.request.u.new_process_ns.ldpathStrId.nid - , msg->u.request.u.new_process_ns.ldpathStrId.id - , msg->u.request.u.new_process_ns.programStrId.nid - , msg->u.request.u.new_process_ns.programStrId.id - , msg->u.request.u.new_process_ns.process_name + , msg->u.request.u.new_process_ns.path + , msg->u.request.u.new_process_ns.ldpath + , msg->u.request.u.new_process_ns.program , msg->u.request.u.new_process_ns.port_name - , msg->u.request.u.new_process_ns.argc - //, msg->u.request.u.new_process_ns.argv , msg->u.request.u.new_process_ns.infile , msg->u.request.u.new_process_ns.outfile , msg->u.request.u.new_process_ns.creation_time.tv_sec , msg->u.request.u.new_process_ns.creation_time.tv_nsec ); + trace_printf("%s@%d - msg.new_process_ns.argc=%d\n" + , method_name, __LINE__ + , msg->u.request.u.new_process_ns.argc ); + for (int i=0; i < msg->u.request.u.new_process_ns.argc; i++) + { + trace_printf("%s@%d - msg.new_process_ns.argv[%d]=%s\n" + , method_name, __LINE__ + , i, msg->u.request.u.new_process_ns.argv[i]); + } } CExternalReq::reqQueueMsg_t msgType; http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/nsprocess.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nsprocess.cxx b/core/sqf/monitor/linux/nsprocess.cxx index dd2ab68..1faa5b5 100644 --- a/core/sqf/monitor/linux/nsprocess.cxx +++ b/core/sqf/monitor/linux/nsprocess.cxx @@ -39,9 +39,12 @@ CProcess *CProcessContainer::CreateProcess (CProcess * parent, int backup, bool unhooked, char *process_name, - strId_t pathStrId, - strId_t ldpathStrId, - strId_t programStrId, + char *path, + char *ldpath, + char *program, +// strId_t pathStrId, +// strId_t ldpathStrId, +// strId_t programStrId, char *infile, char *outfile, void *tag, @@ -62,8 +65,24 @@ CProcess *CProcessContainer::CreateProcess (CProcess * parent, } process = - new CProcess (parent, nid, pid, verifier, type, priority, backup, debug, unhooked, process_name, - pathStrId, ldpathStrId, programStrId, infile, outfile); + new CProcess( parent + , nid + , pid + , verifier + , type + , priority + , backup + , debug + , unhooked + , process_name + , path + , ldpath + , program +// , pathStrId +// , ldpathStrId +// , programStrId + , infile + , outfile); if (process) { AddToList( process ); http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/nsreqnewproc.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nsreqnewproc.cxx b/core/sqf/monitor/linux/nsreqnewproc.cxx index eb06728..2a16707 100644 --- a/core/sqf/monitor/linux/nsreqnewproc.cxx +++ b/core/sqf/monitor/linux/nsreqnewproc.cxx @@ -121,9 +121,12 @@ void CExtNewProcNsReq::performRequest() msg_->u.request.u.new_process_ns.backup, msg_->u.request.u.new_process_ns.unhooked, msg_->u.request.u.new_process_ns.process_name, - msg_->u.request.u.new_process_ns.pathStrId, - msg_->u.request.u.new_process_ns.ldpathStrId, - msg_->u.request.u.new_process_ns.programStrId, + msg_->u.request.u.new_process_ns.path, + msg_->u.request.u.new_process_ns.ldpath, + msg_->u.request.u.new_process_ns.program, +// msg_->u.request.u.new_process_ns.pathStrId, +// msg_->u.request.u.new_process_ns.ldpathStrId, +// msg_->u.request.u.new_process_ns.programStrId, msg_->u.request.u.new_process_ns.infile, msg_->u.request.u.new_process_ns.outfile, 0, // tag http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/nsreqprocinfons.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/nsreqprocinfons.cxx b/core/sqf/monitor/linux/nsreqprocinfons.cxx index dbbf80c..562fdb6 100644 --- a/core/sqf/monitor/linux/nsreqprocinfons.cxx +++ b/core/sqf/monitor/linux/nsreqprocinfons.cxx @@ -92,10 +92,12 @@ void CExtProcInfoNsReq::copyInfo(CProcess *process, ProcessInfoNs_reply_def &pro process_info_ns.unhooked = process->IsUnhooked(); process_info_ns.event_messages = process->IsEventMessages(); process_info_ns.system_messages = process->IsSystemMessages(); + strncpy( process_info_ns.path, process->path(), MAX_PROCESS_PATH ); + strncpy( process_info_ns.ldpath, process->ldpath(), MAX_PROCESS_PATH ); strncpy( process_info_ns.program, process->program(), MAX_PROCESS_PATH ); - process_info_ns.pathStrId = process->pathStrId(); - process_info_ns.ldpathStrId = process->ldPathStrId(); - process_info_ns.programStrId = process->programStrId(); +// process_info_ns.pathStrId = process->pathStrId(); +// process_info_ns.ldpathStrId = process->ldPathStrId(); +// process_info_ns.programStrId = process->programStrId(); strncpy( process_info_ns.port_name, process->GetPort(), MPI_MAX_PORT_NAME ); process_info_ns.argc = process->argc(); memcpy( process_info_ns.argv, process->userArgv(), process->userArgvLen() ); @@ -122,10 +124,12 @@ void CExtProcInfoNsReq::copyInfo(CProcess *process, ProcessInfoNs_reply_def &pro " process_info_ns.unhooked=%d\n" " process_info_ns.event_messages=%d\n" " process_info_ns.system_messages=%d\n" + " process_info_ns.path=%s\n" + " process_info_ns.ldpath=%s\n" " process_info_ns.program=%s\n" - " process_info_ns.pathStrId=%d:%d\n" - " process_info_ns.ldpathStrId=%d:%d\n" - " process_info_ns.programStrId=%d:%d\n" +// " process_info_ns.pathStrId=%d:%d\n" +// " process_info_ns.ldpathStrId=%d:%d\n" +// " process_info_ns.programStrId=%d:%d\n" " process_info_ns.port_name=%s\n" " process_info_ns.argc=%d\n" " process_info_ns.infile=%s\n" @@ -145,13 +149,15 @@ void CExtProcInfoNsReq::copyInfo(CProcess *process, ProcessInfoNs_reply_def &pro , process_info_ns.unhooked , process_info_ns.event_messages , process_info_ns.system_messages + , process_info_ns.path + , process_info_ns.ldpath , process_info_ns.program - , process_info_ns.pathStrId.nid - , process_info_ns.pathStrId.id - , process_info_ns.ldpathStrId.nid - , process_info_ns.ldpathStrId.id - , process_info_ns.programStrId.nid - , process_info_ns.programStrId.id +// , process_info_ns.pathStrId.nid +// , process_info_ns.pathStrId.id +// , process_info_ns.ldpathStrId.nid +// , process_info_ns.ldpathStrId.id +// , process_info_ns.programStrId.nid +// , process_info_ns.programStrId.id , process_info_ns.port_name , process_info_ns.argc , process_info_ns.infile http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/pnode.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx index 563cf01..0884343 100644 --- a/core/sqf/monitor/linux/pnode.cxx +++ b/core/sqf/monitor/linux/pnode.cxx @@ -543,7 +543,7 @@ void CNode::CheckActivationPhase( void ) } tmReady = (tmCount == GetLNodesCount()) ? true : false; } - + if ( tmReady ) { if (trace_settings & (TRACE_INIT | TRACE_SYNC | TRACE_TMSYNC)) @@ -1712,6 +1712,7 @@ void CNodeContainer::AddedNode( CNode *node ) TRACE_EXIT; } +#ifndef NAMESERVER_PROCESS CProcess *CNodeContainer::AddCloneProcess( ProcessInfoNs_reply_def *processInfo ) { const char method_name[] = "CNodeContainer::AddNode"; @@ -1719,6 +1720,11 @@ CProcess *CNodeContainer::AddCloneProcess( ProcessInfoNs_reply_def *processInfo CLNode *lnode = Nodes->GetLNode(processInfo->nid); CNode *node = lnode->GetNode(); + + strId_t pathStrId = MyNode->GetStringId ( processInfo->path, lnode ); + strId_t ldpathStrId = MyNode->GetStringId (processInfo->ldpath, lnode ); + strId_t programStrId = MyNode->GetStringId ( processInfo->program, lnode ); + CProcess *process = node->CloneProcess( processInfo->nid , processInfo->type , processInfo->priority @@ -1733,9 +1739,12 @@ CProcess *CNodeContainer::AddCloneProcess( ProcessInfoNs_reply_def *processInfo , processInfo->parent_verifier , processInfo->event_messages , processInfo->system_messages - , processInfo->pathStrId - , processInfo->ldpathStrId - , processInfo->programStrId + , pathStrId + , ldpathStrId + , programStrId +// , processInfo->pathStrId +// , processInfo->ldpathStrId +// , processInfo->programStrId , processInfo->infile , processInfo->outfile , &processInfo->creation_time @@ -1744,6 +1753,7 @@ CProcess *CNodeContainer::AddCloneProcess( ProcessInfoNs_reply_def *processInfo TRACE_EXIT; return(process); } +#endif CNode *CNodeContainer::AddNode( int pnid ) { @@ -3489,6 +3499,8 @@ CNodeContainer::InitSyncBuffer( struct sync_buffer_def *syncBuf syncBuf->nodeInfo.nodeMask = upNodes; #ifdef NAMESERVER_PROCESS syncBuf->nodeInfo.monConnCount = MyNode->GetMonConnCount(); +#else + syncBuf->nodeInfo.monProcCount = MyNode->GetNumProcs(); #endif for (int i = 0; i < GetPNodesCount(); i++) @@ -3515,7 +3527,7 @@ CNodeContainer::InitSyncBuffer( struct sync_buffer_def *syncBuf , syncBuf->nodeInfo.seq_num , syncBuf->nodeInfo.monConnCount); #else - trace_printf( "%s@%d - Node %s (pnid=%d) node_state=(%d)(%s), internalState=%d, TmSyncState=(%d)(%s), change_nid=%d, seqNum_=%lld\n" + trace_printf( "%s@%d - Node %s (pnid=%d) node_state=(%d)(%s), internalState=%d, TmSyncState=(%d)(%s), change_nid=%d, seqNum_=%lld, monProcCount=%d\n" , method_name, __LINE__ , MyNode->GetName() , MyPNID @@ -3525,7 +3537,8 @@ CNodeContainer::InitSyncBuffer( struct sync_buffer_def *syncBuf , syncBuf->nodeInfo.tmSyncState , SyncStateString( syncBuf->nodeInfo.tmSyncState ) , syncBuf->nodeInfo.change_nid - , syncBuf->nodeInfo.seq_num); + , syncBuf->nodeInfo.seq_num + , syncBuf->nodeInfo.monProcCount); #endif } http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/pnode.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/pnode.h b/core/sqf/monitor/linux/pnode.h index 44008ea..dafee70 100644 --- a/core/sqf/monitor/linux/pnode.h +++ b/core/sqf/monitor/linux/pnode.h @@ -66,7 +66,9 @@ public: ~CNodeContainer( void ); void AddedNode( CNode *node ); +#ifndef NAMESERVER_PROCESS CProcess *AddCloneProcess( ProcessInfoNs_reply_def *processInfo ); +#endif CNode *AddNode( int pnid ); void AddNodes( void ); void AddToSpareNodesList( int pnid ); http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/process.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/process.cxx b/core/sqf/monitor/linux/process.cxx index 9b2be4f..5e7b792 100644 --- a/core/sqf/monitor/linux/process.cxx +++ b/core/sqf/monitor/linux/process.cxx @@ -130,8 +130,15 @@ CProcess::CProcess (CProcess * parent, int nid, int pid, #endif PROCESSTYPE type, int priority, int backup, bool debug, bool unhooked, - char *name, strId_t pathStrId, strId_t ldpathStrId, - strId_t programStrId, char *infile, char *outfile) + char *name, +#ifdef NAMESERVER_PROCESS + char *path, + char *ldpath, + char *program, +#else + strId_t pathStrId, strId_t ldpathStrId, strId_t programStrId, +#endif + char *infile, char *outfile) : Nid (nid), Pid (pid), @@ -182,10 +189,18 @@ CProcess::CProcess (CProcess * parent, int nid, int pid, , argc_(0) , userArgvLen_ (0) , userArgv_ (NULL) - , programStrId_ (programStrId) +#ifdef NAMESERVER_PROCESS + , path_(path) + , ldpath_(ldpath) + , program_(program) +#else + , path_() + , ldpath_() , program_() + , programStrId_(programStrId) , pathStrId_(pathStrId) , ldpathStrId_(ldpathStrId) +#endif , firstInstance_(true) , cmpOrEsp_(false) , trafConf_() @@ -227,7 +242,9 @@ CProcess::CProcess (CProcess * parent, int nid, int pid, if ( outfile && strcmp(outfile,"#default") != 0) outfile_ = outfile; +#ifndef NAMESERVER_PROCESS Config->strIdToString(programStrId_, program_ ); +#endif switch (Type) { @@ -403,6 +420,36 @@ CProcess::~CProcess (void) TRACE_EXIT; } +#ifndef NAMESERVER_PROCESS +const char* CProcess::path() +{ + Config->strIdToString(pathStrId_, path_ ); + return( path_.c_str() ); +} +#endif + +#ifndef NAMESERVER_PROCESS +const char* CProcess::ldpath() +{ + Config->strIdToString(ldpathStrId_, ldpath_ ); + return( ldpath_.c_str() ); +} +#endif + +int CProcess::getUserArgs( char user_argv[MAX_ARGS][MAX_ARG_SIZE] ) +{ + const char *pUserArgv = userArgv_; + int i, arglen; + for (i = 0; i < argc_; i++) + { + arglen = strlen (pUserArgv) + 1; + strcpy( user_argv[i], pUserArgv ); + pUserArgv += arglen; + } + strcpy( user_argv[i], "" ); + return(argc_); +} + void CProcess::userArgs ( int argc, int argvLen, const char * argvList ) { const char method_name[] = "CProcess::userArgs"; @@ -4728,9 +4775,15 @@ CProcess *CProcessContainer::CloneProcess (int nid, int parent_verifier, bool event_messages, bool system_messages, +#ifdef NAMESERVER_PROCESS + char *path, + char *ldpath, + char *program, +#else strId_t pathStrId, strId_t ldpathStrId, strId_t programStrId, +#endif char *infile, char *outfile, struct timespec *creation_time, @@ -4800,8 +4853,21 @@ CProcess *CProcessContainer::CloneProcess (int nid, #ifdef NAMESERVER_PROCESS process = - new CProcess (parent, nid, os_pid, verifier, type, priority, backup, false, unhooked, pname, pathStrId, ldpathStrId, - programStrId, infile, outfile); + new CProcess( parent + , nid + , os_pid + , verifier + , type + , priority + , backup + , false + , unhooked + , pname + , path + , ldpath + , program + , infile + , outfile); #else process = new CProcess (parent, nid, os_pid, type, priority, backup, false, unhooked, pname, pathStrId, ldpathStrId, http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/process.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/process.h b/core/sqf/monitor/linux/process.h index 227836f..3c813bb 100644 --- a/core/sqf/monitor/linux/process.h +++ b/core/sqf/monitor/linux/process.h @@ -89,9 +89,15 @@ class CProcessContainer int parent_verifier, bool event_messages, bool system_messages, +#ifdef NAMESERVER_PROCESS + char *path, + char *ldpath, + char *program, +#else strId_t pathStrId, strId_t ldpathStrId, strId_t programStrId, +#endif char *infile, char *outfile, struct timespec *creation_time, @@ -118,9 +124,15 @@ class CProcessContainer int backup, bool unhooked, char *process_name, +#ifdef NAMESERVER_PROCESS + char *path, + char *ldpath, + char *program, +#else strId_t pathStrId, strId_t ldpathStrId, strId_t programStrId, +#endif char *infile, char *outfile , void *tag @@ -222,9 +234,15 @@ class CProcess bool debug, bool unhooked, char *name, +#ifdef NAMESERVER_PROCESS + char *path, + char *ldpath, + char *program, +#else strId_t pathStrId, strId_t ldpathStrId, strId_t programStrId, +#endif char *infile, char *outfile); ~CProcess( void ); @@ -367,15 +385,25 @@ class CProcess void userArgs ( int argc, int argvLen, const char * argvList ); void userArgs ( int argc, char user_argv[MAX_ARGS][MAX_ARG_SIZE] ); + int getUserArgs( char user_argv[MAX_ARGS][MAX_ARG_SIZE] ); - strId_t programStrId() { return programStrId_; } - const char * program() { return program_.c_str(); }; +#ifdef NAMESERVER_PROCESS + const char* path() { return path_.c_str(); }; + const char* ldpath() { return ldpath_.c_str(); }; +#else + const char* path(); + const char* ldpath(); +#endif + const char* program() { return program_.c_str(); }; bool isCmpOrEsp() { return cmpOrEsp_; } const char *infile() { return infile_.c_str(); }; const char *outfile() { return outfile_.c_str(); }; +#ifndef NAMESERVER_PROCESS strId_t pathStrId() { return pathStrId_; }; strId_t ldPathStrId() { return ldpathStrId_; }; + strId_t programStrId() { return programStrId_; } +#endif const char *fifo_stdin() { return fifo_stdin_.c_str(); }; const char *fifo_stdout() { return fifo_stdout_.c_str(); }; @@ -494,8 +522,10 @@ private: int userArgvLen_; char *userArgv_; + string path_; // process's object lookup path to program + string ldpath_; // process's library load path for program + string program_; // program file name strId_t programStrId_; - string program_; // object file name strId_t pathStrId_; strId_t ldpathStrId_; bool firstInstance_; // reset on persistent process re-creation http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/replicate.cxx ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/replicate.cxx b/core/sqf/monitor/linux/replicate.cxx index f9ebf53..15e0394 100644 --- a/core/sqf/monitor/linux/replicate.cxx +++ b/core/sqf/monitor/linux/replicate.cxx @@ -588,6 +588,9 @@ bool CReplProcess::replicate(struct internal_msg_def *&msg) msg->type = InternalType_Process; msg->u.process.nid = process_->GetNid(); msg->u.process.pid = process_->GetPid(); +#ifdef NAMESERVER_PROCESS + msg->u.process.verifier = process_->GetVerifier(); +#endif msg->u.process.type = process_->GetType(); msg->u.process.priority = process_->GetPriority(); msg->u.process.backup = process_->IsBackup(); @@ -599,16 +602,18 @@ bool CReplProcess::replicate(struct internal_msg_def *&msg) msg->u.process.pair_parent_nid = process_->GetPairParentNid(); msg->u.process.pair_parent_pid = process_->GetPairParentPid(); msg->u.process.pair_parent_verifier = process_->GetPairParentVerifier(); +#ifndef NAMESERVER_PROCESS msg->u.process.pathStrId = process_->pathStrId(); msg->u.process.ldpathStrId = process_->ldPathStrId(); msg->u.process.programStrId = process_->programStrId(); +#endif msg->u.process.argc = process_->argc(); char * stringData = & msg->u.process.stringData; // Copy the process name msg->u.process.nameLen = nameLen_; - memcpy(stringData, process_->GetName(), nameLen_ ); + memcpy(stringData, process_->GetName(), nameLen_ ); stringData += nameLen_; // Copy the standard in file name @@ -618,9 +623,25 @@ bool CReplProcess::replicate(struct internal_msg_def *&msg) // Copy the standard out file name msg->u.process.outfileLen = outfileLen_; - memcpy(stringData, process_->outfile(), outfileLen_ ); + memcpy(stringData, process_->outfile(), outfileLen_ ); stringData += outfileLen_; +#ifdef NAMESERVER_PROCESS + // Copy the path + msg->u.process.pathLen = pathLen_; + memcpy(stringData, process_->path(), pathLen_ ); + stringData += pathLen_; + + // Copy the ldpath + msg->u.process.ldpathLen = ldpathLen_; + memcpy(stringData, process_->ldpath(), ldpathLen_ ); + stringData += ldpathLen_; + + // Copy the program + msg->u.process.programLen = programLen_; + memcpy(stringData, process_->program(), programLen_ ); + stringData += programLen_; +#endif // Copy the program argument strings msg->u.process.argvLen = argvLen_; memcpy(stringData, process_->userArgv(), argvLen_); @@ -628,8 +649,42 @@ bool CReplProcess::replicate(struct internal_msg_def *&msg) // temp trace if (trace_settings & TRACE_PROCESS) { - trace_printf("%s@%d - replSize_=%d, programStrId=(%d,%d), pathStrId=(%d,%d), ldPathStrId=(%d,%d), name=%s, strlen(name)=%d, infile=%s, strlen(infile)=%d, outfile=%s, strlen(outfile)=%d, argc=%d, strlen(total argv)=%d, args=[%.*s]\n", - method_name, __LINE__, replSize_, msg->u.process.programStrId.nid, msg->u.process.programStrId.id, msg->u.process.pathStrId.nid, msg->u.process.pathStrId.id, msg->u.process.ldpathStrId.nid, msg->u.process.ldpathStrId.id, &msg->u.process.stringData, nameLen_, &msg->u.process.stringData+nameLen_, infileLen_, &msg->u.process.stringData+nameLen_+infileLen_, outfileLen_, msg->u.process.argc, argvLen_, argvLen_, &msg->u.process.stringData+nameLen_+infileLen_+outfileLen_); + trace_printf( "%s@%d - replSize_=%d\n" + " msg->u.process.name=%s, strlen(name)=%d\n" + " msg->u.process.infile=%s, strlen(infile)=%d\n" + " msg->u.process.outfile=%s, strlen(outfile)=%d\n" +#ifdef NAMESERVER_PROCESS + " msg->u.process.path=%s, strlen(path)=%d\n" + " msg->u.process.ldpath=%s, strlen(ldpath)=%d\n" + " msg->u.process.program=%s, strlen(program)=%d\n" +#else + " msg->u.process.programStrId=(%d,%d)\n" + " msg->u.process.pathStrId=(%d,%d)\n" + " msg->u.process.ldPathStrId=(%d,%d)\n" +#endif + " msg->u.process.argc=%d, strlen(total argv)=%d, args=[%.*s]\n" + , method_name, __LINE__, replSize_ + , &msg->u.process.stringData, nameLen_ + , &msg->u.process.stringData+nameLen_, infileLen_ + , &msg->u.process.stringData+nameLen_+infileLen_, outfileLen_ +#ifdef NAMESERVER_PROCESS + , &msg->u.process.stringData+nameLen_+infileLen_+outfileLen_, pathLen_ + , &msg->u.process.stringData+nameLen_+infileLen_+outfileLen_+pathLen_, ldpathLen_ + , &msg->u.process.stringData+nameLen_+infileLen_+outfileLen_+pathLen_+ldpathLen_, programLen_ +#else + , msg->u.process.programStrId.nid + , msg->u.process.programStrId.id + , msg->u.process.pathStrId.nid + , msg->u.process.pathStrId.id + , msg->u.process.ldpathStrId.nid + , msg->u.process.ldpathStrId.id +#endif + , msg->u.process.argc +#ifdef NAMESERVER_PROCESS + , argvLen_, argvLen_, &msg->u.clone.stringData+nameLen_+infileLen_+outfileLen_+pathLen_+ldpathLen_+programLen_); +#else + , argvLen_, argvLen_, &msg->u.clone.stringData+nameLen_+infileLen_+outfileLen_); +#endif } // Advance sync buffer pointer @@ -646,8 +701,6 @@ bool CReplProcess::replicate(struct internal_msg_def *&msg) return true; } - - CReplProcInit::CReplProcInit( CProcess *process , void *tag , int result @@ -737,11 +790,23 @@ CReplClone::CReplClone(CProcess *process) : process_(process) infileLen_ = strlen(process->infile()) + 1; outfileLen_ = strlen(process->outfile()) + 1; argvLen_ = process->userArgvLen(); +#ifdef NAMESERVER_PROCESS + pathLen_ = strlen(process_->path()) + 1; + ldpathLen_ = strlen(process_->ldpath()) + 1; + programLen_ = strlen(process_->program()) + 1; +#endif // Compute message size (adjust if needed to conform to // internal_msg_def structure alignment). +#ifdef NAMESERVER_PROCESS + replSize_ = (MSG_HDR_SIZE + sizeof( clone_def ) + nameLen_ + portLen_ + + infileLen_ + outfileLen_ + argvLen_ + + pathLen_ + ldpathLen_ + programLen_ + msgAlignment_ + ) & ~msgAlignment_; +#else replSize_ = (MSG_HDR_SIZE + sizeof( clone_def ) + nameLen_ + portLen_ + infileLen_ + outfileLen_ + argvLen_ + msgAlignment_ ) & ~msgAlignment_; +#endif if (trace_settings & (TRACE_SYNC_DETAIL | TRACE_PROCESS_DETAIL)) { @@ -792,9 +857,11 @@ bool CReplClone::replicate(struct internal_msg_def *&msg) msg->u.clone.priority = process_->GetPriority(); msg->u.clone.backup = process_->IsBackup(); msg->u.clone.unhooked = process_->IsUnhooked(); +#ifndef NAMESERVER_PROCESS msg->u.clone.pathStrId = process_->pathStrId(); msg->u.clone.ldpathStrId = process_->ldPathStrId(); msg->u.clone.programStrId = process_->programStrId(); +#endif msg->u.clone.os_pid = process_->GetPid(); msg->u.clone.verifier = process_->GetVerifier(); msg->u.clone.prior_pid = process_->GetPriorPid (); @@ -834,37 +901,78 @@ bool CReplClone::replicate(struct internal_msg_def *&msg) memcpy(stringData, process_->outfile(), outfileLen_ ); stringData += outfileLen_; +#ifdef NAMESERVER_PROCESS + // Copy the path + msg->u.clone.pathLen = pathLen_; + memcpy(stringData, process_->path(), pathLen_ ); + stringData += pathLen_; + + // Copy the ldpath + msg->u.clone.ldpathLen = ldpathLen_; + memcpy(stringData, process_->ldpath(), ldpathLen_ ); + stringData += ldpathLen_; + + // Copy the program + msg->u.clone.programLen = programLen_; + memcpy(stringData, process_->program(), programLen_ ); + stringData += programLen_; +#endif + // Copy the program argument strings msg->u.clone.argvLen = argvLen_; memcpy(stringData, process_->userArgv(), argvLen_); // temp trace +#ifndef NAMESERVER_PROCESS if (trace_settings & TRACE_PROCESS) { trace_printf( "%s@%d - replSize_=%d\n" - " msg->u.clone.programStrId=(%d,%d)\n" - " msg->u.clone.pathStrId=(%d,%d)\n" - " msg->u.clone.ldPathStrId=(%d,%d)\n" " msg->u.clone.name=%s, strlen(name)=%d\n" " msg->u.clone.port=%s, strlen(port)=%d\n" " msg->u.clone.infile=%s, strlen(infile)=%d\n" " msg->u.clone.outfile=%s, strlen(outfile)=%d\n" + " msg->u.clone.programStrId=(%d,%d)\n" + " msg->u.clone.pathStrId=(%d,%d)\n" + " msg->u.clone.ldPathStrId=(%d,%d)\n" " msg->u.clone.argc=%d, strlen(total argv)=%d, args=[%.*s]\n" , method_name, __LINE__, replSize_ + , &msg->u.clone.stringData, nameLen_ + , &msg->u.clone.stringData+nameLen_, portLen_ + , &msg->u.clone.stringData+nameLen_+portLen_, infileLen_ + , &msg->u.clone.stringData+nameLen_+portLen_+infileLen_, outfileLen_ , msg->u.clone.programStrId.nid , msg->u.clone.programStrId.id , msg->u.clone.pathStrId.nid , msg->u.clone.pathStrId.id , msg->u.clone.ldpathStrId.nid , msg->u.clone.ldpathStrId.id + , msg->u.clone.argc + , argvLen_, argvLen_, &msg->u.clone.stringData+nameLen_+portLen_+infileLen_+outfileLen_); + } +#else + if (trace_settings & TRACE_PROCESS) + { + trace_printf( "%s@%d - replSize_=%d\n" + " msg->u.clone.name=%s, strlen(name)=%d\n" + " msg->u.clone.port=%s, strlen(port)=%d\n" + " msg->u.clone.infile=%s, strlen(infile)=%d\n" + " msg->u.clone.outfile=%s, strlen(outfile)=%d\n" + " msg->u.clone.path=%s, strlen(path)=%d\n" + " msg->u.clone.ldpath=%s, strlen(ldpath)=%d\n" + " msg->u.clone.program=%s, strlen(program)=%d\n" + " msg->u.clone.argc=%d, strlen(total argv)=%d, args=[%.*s]\n" + , method_name, __LINE__, replSize_ , &msg->u.clone.stringData, nameLen_ , &msg->u.clone.stringData+nameLen_, portLen_ , &msg->u.clone.stringData+nameLen_+portLen_, infileLen_ , &msg->u.clone.stringData+nameLen_+portLen_+infileLen_, outfileLen_ + , &msg->u.clone.stringData+nameLen_+portLen_+infileLen_+outfileLen_, pathLen_ + , &msg->u.clone.stringData+nameLen_+portLen_+infileLen_+outfileLen_+pathLen_, ldpathLen_ + , &msg->u.clone.stringData+nameLen_+portLen_+infileLen_+outfileLen_+pathLen_+ldpathLen_, programLen_ , msg->u.clone.argc - , argvLen_, argvLen_, &msg->u.clone.stringData+nameLen_+portLen_+infileLen_+outfileLen_); + , argvLen_, argvLen_, &msg->u.clone.stringData+nameLen_+portLen_+infileLen_+outfileLen_+pathLen_+ldpathLen_+programLen_); } - +#endif // Advance sync buffer pointer Nodes->AddMsg( msg, replSize() ); http://git-wip-us.apache.org/repos/asf/trafodion/blob/1f7eabb0/core/sqf/monitor/linux/replicate.h ---------------------------------------------------------------------- diff --git a/core/sqf/monitor/linux/replicate.h b/core/sqf/monitor/linux/replicate.h index 3ae0909..0526159 100644 --- a/core/sqf/monitor/linux/replicate.h +++ b/core/sqf/monitor/linux/replicate.h @@ -155,6 +155,11 @@ private: int nameLen_; int infileLen_; int outfileLen_; +#ifdef NAMESERVER_PROCESS + int pathLen_; + int ldpathLen_; + int programLen_; +#endif int argvLen_; }; @@ -192,6 +197,11 @@ private: int infileLen_; int outfileLen_; int argvLen_; +#ifdef NAMESERVER_PROCESS + int pathLen_; + int ldpathLen_; + int programLen_; +#endif }; class CReplExit: public CReplObj
