Hi Yan,

Thank you for comment.

> > Hi Lars,
> > Hi All,
> > 
> > A cause to be delayed became clear.
> > 
> > This problem occurs by a timing.
> > 
> > When hbagent receives F_STATUS message while hbagent waits for a reply of 
> > the api communication, 
> Under this circumstance, is there a specific heartbeat op that hbagent
> is waiting for?

Yes.

However, it is F_STATUS message of the considerably first stage that hbagent 
performs queueing .
I pinpoint which hb_api of hbagent it is.

When I made the following modifications, it was over log of the queueing .

(snip)
/*
 * Read an API message.  All other messages are enqueued to be read later.
 */
static struct ha_msg *
read_api_msg(llc_private_t* pi)
{
        for (;;) {
                struct ha_msg*  msg;
                const char *    type;
                pi->chan->ops->waitin(pi->chan);
                if (pi->chan->ch_status  == IPC_DISCONNECT){
                        break;
                }
                if ((msg=msgfromIPC(pi->chan, 0)) == NULL) {
                        ha_api_perror("read_api_msg: "                          
            "Cannot read reply from IPC channel");
                        continue;
                }
                if ((type=ha_msg_value(msg, F_TYPE)) != NULL                &&  
    strcmp(type, T_APIRESP) == 0) {
                        return(msg);
                }
                /* Got an unexpected non-api message */
                /* Queue it up for reading later */
/* yamauchi */
if (strcasecmp(ha_msg_value(msg, F_TYPE),T_STATUS) == 0) {
        cl_log(LOG_INFO, "##### yamuchi enqure_msg ()#####");
        cl_log_message(LOG_INFO, msg);
}
                enqueue_msg(pi, msg);
        }
        /*NOTREACHED*/
        return(NULL);
}

(snip)
Jul 27 19:13:50 srv01 ccm: [5432]: info: ##### yamuchi enqure_msg ()#####
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG: Dumping message with 12 fields
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[0] : [t=status]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[1] : [st=active]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[2] : [dt=6590]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[3] : [protocol=1]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[4] : [src=srv02]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[5] : [(1)srcuuid=0xa006540(36 27)]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[6] : [seq=6]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: ##### yamuchi enqure_msg 
()#####
Jul 27 19:13:50 srv01 stonithd: [5435]: info: ##### yamuchi enqure_msg ()#####
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[7] : [hg=4ddb3648]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG: Dumping message with 12 
fields
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG: Dumping message with 12 
fields
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[8] : [ts=4e2fe4dd]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[0] : [t=status]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[0] : [t=status]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[9] : [ld=0.04 0.12 0.15 3/89 5394]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[1] : [st=active]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[1] : [st=active]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[10] : [ttl=3]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[2] : [dt=6590]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[2] : [dt=6590]
Jul 27 19:13:50 srv01 ccm: [5432]: info: MSG[11] : [auth=1 
69619762aa14655cdccd9778ec4c4861a15a0f19]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[3] : [protocol=1]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[3] : [protocol=1]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[4] : [src=srv02]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[4] : [src=srv02]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[5] : 
[(1)srcuuid=0x84255e0(36 27)]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[5] : [(1)srcuuid=0x83b7bf8(36 
27)]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[6] : [seq=6]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[6] : [seq=6]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[7] : [hg=4ddb3648]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[7] : [hg=4ddb3648]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[8] : [ts=4e2fe4dd]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[8] : [ts=4e2fe4dd]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[9] : [ld=0.04 0.12 0.15 
3/89 5394]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[9] : [ld=0.04 0.12 0.15 3/89 
5394]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[10] : [ttl=3]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[10] : [ttl=3]
Jul 27 19:13:50 srv01 lha-snmpagent: [5438]: info: MSG[11] : [auth=1 
69619762aa14655cdccd9778ec4c4861a15a0f19]
Jul 27 19:13:50 srv01 stonithd: [5435]: info: MSG[11] : [auth=1 
69619762aa14655cdccd9778ec4c4861a15a0f19]
(snip)
Jul 27 19:13:52 srv01 cib: [5433]: info: ##### yamuchi enqure_msg ()#####
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG: Dumping message with 12 fields
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[0] : [t=status]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[1] : [st=active]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[2] : [dt=6590]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[3] : [protocol=1]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[4] : [src=srv02]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[5] : [(1)srcuuid=0x8fc9060(36 27)]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[6] : [seq=6]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[7] : [hg=4ddb3648]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[8] : [ts=4e2fe4dd]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[9] : [ld=0.04 0.12 0.15 3/89 5394]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[10] : [ttl=3]
Jul 27 19:13:52 srv01 cib: [5433]: info: MSG[11] : [auth=1 
69619762aa14655cdccd9778ec4c4861a15a0f19]
(snip)


> 
> > F_STATUS is performed queueing of.
> > 
> > When hbagent caught the event from Heartbeat, this message is handled.
> > Therefore, it is handled at the time of events such as one down of the 
> > inter-connect.
> > 
> > Therefore, the active trap of the node is transmitted when inter-connect 
> > fell.
> > 
> > /*
> >  * Read an API message.  All other messages are enqueued to be read later.
> >  */
> > static struct ha_msg *
> > read_api_msg(llc_private_t* pi)
> > {
> > 
> >     for (;;) {
> >         struct ha_msg*    msg;
> >         const char *    type;
> >         
> >         pi->chan->ops->waitin(pi->chan);
> >         if (pi->chan->ch_status  == IPC_DISCONNECT){
> >             break;
> >         }
> >         if ((msg=msgfromIPC(pi->chan, 0)) == NULL) {
> >             ha_api_perror("read_api_msg: "
> >                       "Cannot read reply from IPC channel");
> >             continue;
> >         }
> >         if ((type=ha_msg_value(msg, F_TYPE)) != NULL
> >         &&    strcmp(type, T_APIRESP) == 0) {
> >             return(msg);
> >         }
> >         /* Got an unexpected non-api message */
> >         /* Queue it up for reading later */
> >         enqueue_msg(pi, msg);
> >     }
> >     /*NOTREACHED*/
> >     return(NULL);
> > }
> > 
> > 
> > 
> > I think that the following correction is necessary.
> > snmp_subagent/hbagent.c
> > (snip)
> >                         } else {
> > 
> >                                 /* snmp request */
> >                                 snmp_read(&fdset);
> > 
> >                                 ret = handle_heartbeat_msg(); ----> read 
> >queueing msg.!!
> >                         }
> > (snip)
> I'm still confused about invoking handle_heartbreat_msg() when select()
> finds that the SNMP socket has input. Is it an appropriate timing?

Sorry....

This correction is one example.
Because I do not know a lot about handling of hbagent, I demand the 
instructions of your right correction.

Best Regards,
Hideo Yamauchi.

> 
> Regards,
>   Yan
> -- 
> Gao,Yan <[email protected]>
> Software Engineer
> China Server Team, SUSE.
> _______________________________________________
> Linux-HA mailing list
> [email protected]
> http://lists.linux-ha.org/mailman/listinfo/linux-ha
> See also: http://linux-ha.org/ReportingProblems
> 
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to