[ 
https://issues.apache.org/jira/browse/TRAFODION-2883?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16380731#comment-16380731
 ] 

ASF GitHub Bot commented on TRAFODION-2883:
-------------------------------------------

Github user DaveBirdsall commented on a diff in the pull request:

    https://github.com/apache/trafodion/pull/1457#discussion_r171324209
  
    --- Diff: core/sqf/monitor/linux/zclient.cxx ---
    @@ -799,6 +896,67 @@ bool CZClient::IsZNodeExpired( const char *nodeName, 
int &zerr )
         return( expired );
     }
     
    +int CZClient::CreateMasterZNode(  const char *nodeName )
    +{
    +    const char method_name[] = "CZClient::CreateMasterZNode";
    +    TRACE_ENTRY;
    +
    +    int rc;
    +    int retries = 0;
    +    
    +    stringstream masterpath;
    +    masterpath.str( "" );
    +    masterpath << zkRootNode_.c_str() 
    +            << zkRootNodeInstance_.c_str() 
    +            << ZCLIENT_MASTER_ZNODE<< "/"
    +            << nodeName;
    +            
    +    string monZnode = masterpath.str( );
    +
    +    stringstream ss;
    +    ss.str( "" );
    +    ss <<nodeName << ":" << MyPNID;
    +    string monData = ss.str( ); 
    +
    +    if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
    +    {
    +        trace_printf( "%s@%d RegisterZNode(%s:%s)\n"
    +                    , method_name, __LINE__
    +                    , monZnode.c_str()
    +                    , monData.c_str() );
    +    }
    +
    +    rc = RegisterZNode( monZnode.c_str(), monData.c_str(), ZOO_EPHEMERAL );
    +    while ( ((rc == ZCONNECTIONLOSS) || (rc == ZOPERATIONTIMEOUT)) && 
retries < ZOOKEEPER_RETRY_COUNT)
    +    {
    +        sleep(ZOOKEEPER_RETRY_WAIT);
    +        retries++;
    +        rc = RegisterZNode( monZnode.c_str(), monData.c_str(), 
ZOO_EPHEMERAL );
    +    }
    +    
    +    if (rc != ZOK)
    +    {
    +        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
    +        {
    +            trace_printf( "%s@%d Error (MasterMonitor) Create master node 
for %s with rc = %d)\n"
    +                    , method_name, __LINE__, monZnode.c_str( ), rc);
    +        }
    +        char buf[MON_STRING_BUF_SIZE];
    +        snprintf( buf, sizeof(buf)
    +                , "[%s], RegisterZNode(%s) failed with error %s\n"
    +                , method_name, monData.c_str(), zerror(rc) );
    +        mon_log_write(MON_ZCLIENT_CREATEMASTERZNODE, SQ_LOG_ERR, buf);
    +        return(rc); // Return the error
    --- End diff --
    
    Should there be a TRACE_EXIT before this return statement?


> Preliminary Trafodion Foundation Scalability Enhancements
> ---------------------------------------------------------
>
>                 Key: TRAFODION-2883
>                 URL: https://issues.apache.org/jira/browse/TRAFODION-2883
>             Project: Apache Trafodion
>          Issue Type: Improvement
>          Components: dtm, foundation, installer
>    Affects Versions: 2.3
>            Reporter: Gonzalo E Correa
>            Assignee: Gonzalo E Correa
>            Priority: Major
>             Fix For: 2.3
>
>
> Initial changes required to:
>   - AGENT mode monitor
>       o Preliminary change to remove dependency on OpenMPI during 
> initialization of operational cluster by creating a cluster
>           of one node (MASTER monitor) where other remote nodes (SLAVE 
> monitors) join the cluster through the MASTER
>  - MASTER monitor selection
>  - Scale bug fixes found when creating clusters greater than 120 nodes



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to