Hi Alan,
I haven't run this with valgrind yet.
On the other hand, I saw some codes from send_ha_message() in crm that checking 
if the channel is connected before sending. I'm testing to see this would help.
 
Thanks,
Phong


________________________________
 From: Alan Robertson <[email protected]>
To: Nguyen Dinh Phong <[email protected]>; High-Availability Linux Development 
List <[email protected]> 
Sent: Wednesday, April 18, 2012 10:13 PM
Subject: Re: [Linux-ha-dev] Core Dump When Sending to Other Node That's 
Resetting
 

Have you tried running this under valgrind?

On 04/13/2012 05:22 PM, Nguyen Dinh Phong wrote: 
Hi,
>I wrote a wrapper using hbclient api for an application that manages the 
>redundancy of our system. The application uses the wrapper to send/receive 
>messages (string) between the primary and secondary.
>In our testing of reset and switch over, once in a while, there is core dump 
>in the send with double free in libc, that I do not know if caused by my 
>wrapper of hbclient api.
>
>
>
>/lib/libc.so.6[0xf7d71629]
>/lib/libc.so.6(cfree+0x59)[0xf7d719e9]
>/usr/lib/libplumb.so.2[0xf7e88dcf]
>/usr/lib/libplumb.so.2[0xf7e9a03e]
>/usr/lib/libplumb.so.2[0xf7e9a1a4]
>/usr/lib/libplumb.so.2[0xf7e9922f]
>/usr/lib/libplumb.so.2(msg2ipcchan+0xb8)[0xf7e891ea]
>/usr/lib/libhbclient.so.1[0xf7e6a736]
>/usr/lib/libha_lib.so(hb_send+0x204)[0xf7e61e15] ---> my wrapper
>
>
>I use send_ordered_nodemsg() to send and readmsg() to read (based on 
>api_test.c). However in sample codes of ipfail or drbd, I saw the setting up 
>of IPChannel and usage of msg2ipcchan(). Which is more appropriate?
>
>
>
>I'd also like to know if I should add more codes to handle node status change 
>because the crashes always occur when the other node go reset.
>
>
>Snippet of my codes:
>
>1. Initialization:
>if (mhm_hb->llc_ops->signon(mhm_hb, "ping")!= HA_OK) {    // I pasted the 
>common "ping", 
>                                                                               
>                                     //  plan to change to different name
>    cl_log(LOG_ERR, "Cannot sign on with heartbeat");
>...
>
>2. Send:
>int hb_send(ll_cluster_t *hb, char *dest, void *buf, size_t
            sz)
>{
>  HA_Message *msg;
>  if (hb==NULL) return HA_FAIL;
>  msg = ha_msg_new(0);
>  if (ha_msg_add(msg, F_TYPE, T_MHM_MSG) != HA_OK) {
>    cl_log(LOG_ERR, "hb_send: cannot add field TYPE\n");
>    ZAPMSG(msg);
>    return HA_FAIL;
>  }
>  if (ha_msg_add(msg, F_ORIG, node_name) != HA_OK) {
>    cl_log(LOG_ERR, "hb_send: cannot add field ORIG\n");
>    ZAPMSG(msg);
>    return HA_FAIL;
>  }
>  char *payload = malloc(sz+1);
>  if (payload==NULL) {
>    ZAPMSG(msg);
>    return HA_FAIL;
>  }
>  memset(payload, 0, sz+1);    // Add a Null byte at the end
>  memcpy(payload, buf, sz);
>  if (ha_msg_add(msg, F_MHM_PAYLOAD, payload) != HA_OK) {
>    cl_log(LOG_ERR, "hb_send: cannot add field PAYLOAD\n");
>    ZAPMSG(msg);
>    return HA_FAIL;
>  }
>  if (hb->llc_ops->send_ordered_nodemsg(hb, msg,
            peer_name) != HA_OK) {
>    ZAPMSG(msg);
>    return HA_FAIL;
>  }
>  else {
>    ZAPMSG(msg);
>    return sz;
>  }
>}
>
>3. Receive:
>int hb_recv(ll_cluster_t *hb, void *buf, size_t sz)
>{
>    int msgcount=0;
>    HA_Message *reply;
>
>    if (hb==NULL) return HA_FAIL;
>    memset(buf, 0, sz);
>    for(; (reply=hb->llc_ops->readmsg(hb, 1)) !=
            NULL;) { ----> Blocking receiving
>        const char *    type;
>        const char *    orig;
>        const char *payload;
>        ++msgcount;
>        if ((type = ha_msg_value(reply, F_TYPE)) == NULL) {
>            type = "?";
>        }
>        if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) {
>            orig = "?";
>        }
>        cl_log(LOG_DEBUG, "Got message %d of type [%s] from
            [%s]"
>        ,    msgcount, type, orig);
>        if (strcmp(type, T_MHM_MSG) == 0) {
>          payload = ha_msg_value(reply, F_MHM_PAYLOAD);
>          
>          int p_sz = strlen(payload);
>          cl_log(LOG_DEBUG, "payload %s sz %d p_sz %d\n",
            payload, sz, p_sz);
>                    
>          if (p_sz <= sz) {
>            char *tmp = (char*) buf;
>            strncpy(tmp, payload, p_sz);
>            cl_log(LOG_DEBUG, "return buf %s sz %d ret_val
            %d", buf, strlen(buf), p_sz);
>            ZAPMSG(reply);
>            return(p_sz);
>          } else {
>            cl_log(LOG_ERR, "Receive buffer %d too small for
            payload %d", sz, p_sz);
>            ZAPMSG(reply);
>            return HA_FAIL;
>          }
>        }
>        ZAPMSG(reply);    //// ---> Could we delete message that's not meant 
>to our module, or should we let it go? 
>    }
>    if (reply==NULL) {
>      cl_log(LOG_ERR, "read_hb_msg returned NULL");
>      cl_log(LOG_ERR, "REASON: %s",
            hb->llc_ops->errmsg(hb));
>    }
>    return 0;
>}
>
>
>Thanks,
>Phong
>
>
>
>
>_______________________________________________________
Linux-HA-Dev: [email protected] 
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page: 
http://linux-ha.org/ 


--  Alan Robertson <[email protected]> - @OSSAlanR "Openness is the foundation and 
preservative of friendship...  Let me claim from you at all times your 
undisguised opinions." - William Wilberforce 
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to