Github user narendragoyal commented on a diff in the pull request:

    https://github.com/apache/trafodion/pull/1457#discussion_r171387433
  
    --- Diff: core/sqf/monitor/linux/zclient.cxx ---
    @@ -488,6 +488,103 @@ int CZClient::ZooExistRetry(zhandle_t *zh, const char 
*path, int watch, struct S
         return rc;
     }
     
    +const char* CZClient::WaitForAndReturnMaster( bool doWait )
    +{
    +    const char method_name[] = "CZClient::WaitForAndReturnMaster";
    +    TRACE_ENTRY;
    +    
    +    bool found = false;
    +    int rc = -1;
    +    int retries = 0;
    +    Stat stat;
    +
    +    struct String_vector nodes = {0, NULL};
    +    stringstream ss;
    +    ss.str( "" );
    +    ss << zkRootNode_.c_str() 
    +       << zkRootNodeInstance_.c_str() 
    +       << ZCLIENT_MASTER_ZNODE;
    +    string masterMonitor( ss.str( ) );
    +
    +    // wait for 3 minutes for giving up.  
    +    while ( (!found) && (retries < 180)) 
    +    {
    +        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
    +        {
    +            trace_printf( "%s@%d trafCluster=%s\n"
    +                        , method_name, __LINE__, masterMonitor.c_str() );
    +        }
    +        // Verify the existence of the parent ZCLIENT_MASTER_ZNODE
    +        rc = ZooExistRetry( ZHandle, masterMonitor.c_str( ), 0, &stat );
    +        
    +        if ( rc == ZNONODE )
    +        {
    +            if (doWait == false)
    +            {
    +                break;
    +            } 
    +            continue;
    +        }
    +        else if ( rc == ZOK )
    +        {
    +            // Now get the list of available znodes in the cluster.
    +            //
    +            // This will return child znodes for each monitor process that 
has
    +            // registered, including this process.
    +            rc = zoo_get_children( ZHandle, masterMonitor.c_str( ), 0, 
&nodes );
    +            if ( nodes.count > 0 )
    +            {
    +                if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
    +                {
    +                    trace_printf( "%s@%d nodes.count=%d\n"
    +                                , method_name, __LINE__
    +                                , nodes.count );
    +                }
    +                found = true;
    +            }
    +            else
    +            {
    +                if (doWait == false)
    +                {
    +                    break;
    +                }
    +                usleep(1000000); // sleep for a second as to not overwhelm 
the system   
    +           retries++;
    +                continue;
    +            }
    +        }
    +         
    +        else  // error
    +        { 
    +       if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
    +            {
    +                trace_printf( "%s@%d Error (MasterMonitor) 
WaitForAndReturnMaster returned rc (%d), retries %d\n"
    --- End diff --
    
    I think we don't need the 'WaitForAndReturnMaster' in the string - the 
method_name being printed already has it


---

Reply via email to