On Mon, Jun 18, 2007 at 01:55:12PM +0900, DAIKI MATSUDA wrote: > Hi, All > > I add the new function for heartbeat-2.0.8 and attached its patch file. > > The function is to apply the new timeout parameters ( keepalive, deadtime, > deadping, warntime ) without stopping the heartbeat services. Currently > heartbeat boot scripts supply the 'reload' or 'forcereload' function, but > it, they are same, does stop the services and the HA services are moved to > standby node, because its process kills the forked heartbeat processes and > clients ( crmd etc. ). > So, we think to without suspending the services make the changing parameters > to apply to driving nodes. Current feature is following. > 1. changing ha.cf file for 4 parameters > 2. send working parent heartbeat process signal SIGRTMAX (e.g. kill -s > SIGRTMAX `cat /var/run/heartbeat.pid` (Why do I choose SIGRTMAX? I do not > find the unused good signal.) > > As we research the heatbeat, it may be safety. And I want to listen to your > issues for patch and functions. > > Best Regards > MATSUDA, Daiki
Hi Matsuda-san, I've taken a look over your patch. Its very long, so I am not sure that I understand it all. One thing that I am particularly puzzled over is the purpose of maxdispatchdelay_array. Is this used to notify child processes when an update occurs? I've put a number of comments inline, mainly minor things relating to formating of the code. Other than that my only queries are: * Has it been tested? * If so, can you re-test it against the dev tree? * If at all possible could you split the patch up into smaller bits. Perhaps if you adopt some of my refactoring of existing code ideas they could be separate patches? * Do you propose to add a target to this to the init script? > diff -uNr heartbeat-2.0.8/heartbeat/config.c > heartbeat-2.0.8.orig/heartbeat/config.c > --- heartbeat-2.0.8/heartbeat/config.c 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/heartbeat/config.c 2007-06-18 10:38:26.000000000 > +0900 > @@ -116,6 +116,33 @@ > static int set_normalpoll(const char *); > #endif > > +void modify_setmaxdispatchdelay_value( int ); > +void modify_setall_id_value( int ); > +void modify_channel_value( int ); > + > +/* from include/clplumbing/GSource_internal.h */ > +#define COMMON_STRUCTSTART \ > +GSource source; /* Common glib struct - must be 1st */ > \ > +unsigned magno; /* Magic number */ \ > +long maxdispatchms; /* Time limit for dispatch function */ \ > +long maxdispatchdelayms; /* Max delay before processing */ \ > +char detecttime[sizeof(longclock_t)]; \ > + /* Time last input detected */ \ > +void* udata; /* User-defined data */ > \ > +guint gsourceid; /* Source id of this source */ > \ > +const char * description; /* Description of this source */ \ > +GDestroyNotify dnotify > + > +/* from lib/clplumbing/GSource.c */ > +struct GTimeoutAppend { > + COMMON_STRUCTSTART; > + longclock_t nexttime; > + guint interval; > +}; Should GTimeoutAppend be moved into a header file rather than duplicated? > + > +extern GPtrArray *maxdispatchdelay_array; > +extern GPtrArray *setall_array; > +extern GArray *timeout_add_full_array; > > void hb_set_max_rexmit_delay(int); > /* > @@ -212,6 +239,7 @@ > int netstring_format = FALSE; > extern int UseApphbd; > GSList* del_node_list; > +extern int watchdog_timeout_ms; > > > static int islegaldirective(const char *directive); > @@ -781,6 +809,384 @@ > return(errcount ? HA_FAIL : HA_OK); > } > > +/* care for G_main_setmaxdispatchdelay() function */ > +void > +modify_setmaxdispatchdelay_value( int timeout_type ) > +{ > + int i; > + long value; > + > + switch( timeout_type ){ > + case 0: > + value = config->heartbeat_ms; > + break; > + case 1: > + value = config->deadtime_ms; > + break; > + case 2: > + value = config->warntime_ms; > + break; > + default: > + return; > + } > + > + for( i = 0; i < maxdispatchdelay_array->len; i++ ){ > + MaxDispatchDelay_Array_t *carray = g_ptr_array_index( > maxdispatchdelay_array, i ); > + > + if( carray->type == timeout_type ){ > + if( carray->mode == '*' ){ > + G_main_setmaxdispatchdelay( ( GSource * > )carray->addr, value * carray->value ); > + } else if( carray->mode == '/' ){ > + G_main_setmaxdispatchdelay( ( GSource * > )carray->addr, value / carray->value ); > + } > + } > + } > +} Could you reformat the code above a little bit so that it is <= 80col wide? > + > +/* care for Gmain_setall_id() function */ > +void > +modify_setall_id_value( int timeout_type ) > +{ > + int i; > + long value; > + > + switch( timeout_type ){ > + case 0: > + value = config->heartbeat_ms; > + break; > + case 1: > + value = config->deadtime_ms; > + break; > + case 2: > + value = config->warntime_ms; > + break; > + default: > + return; > + } The logic above seems to be a duplicate of the first half of modify_setmaxdispatchdelay_value(). Is there any value in breaking it out into its own (static?) function? > + > + for( i = 0; i < setall_array->len; i++ ){ > + SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i ); > + > + if( sarray->type == timeout_type ){ > + if( sarray->calc[0] == '\0' ){ > + G_main_setmaxdispatchdelay_id( sarray->id, > value ); > + } else { > + char *ptr1 = sarray->calc; > + char *ptr2; > + unsigned long val = value; > + > + for( ; ; ){ > + char buf[2]; > + int num; > + > + ptr2 = strchr( ptr1, ' ' ); > + if( ptr2 != NULL ) ptr2 = strchr( > ++ptr2, ' ' ); > + > + if( ptr2 != NULL ){ > + *ptr2 = '\0'; > + } > + sscanf( ptr1, "%s %d", buf, &num ); > + switch( buf[0] ){ > + case '+': > + val += num; > + break; > + case '-': > + val -= num; > + break; > + case '*': > + val *= num; > + break; > + case '/': > + val /= num; > + break; > + default: > + break; > + } > + if( ptr2 == NULL ){ > + break; > + } else { > + *ptr2 = ' '; > + ptr1 = ptr2 + 1; > + } > + } > + G_main_setmaxdispatchdelay_id( sarray->id, val > ); > + } > + } > + } > +} Again, 80col please. Perhaps if you change the logic around a bit then making it narrower would be easier: for( i = 0; i < setall_array->len; i++ ){ SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i ); if( sarray->type != timeout_type ) { continue; } if( sarray->calc[0] == '\0' ){ G_main_setmaxdispatchdelay_id( sarray->id, value ); continue; } for( ; ; ){ ... Also, I'm not a big fan of declaring variables other inside blocks, but I'm not sure what the linux-ha guidelines on this are. > + > +void > +modify_channel_value( int timeout_type ) > +{ > + modify_setmaxdispatchdelay_value( timeout_type ); > + modify_setall_id_value( timeout_type ); > +} > + > +int > +light_parse_config(const char * cfgfile ) The top half of light_parse_config() seems to contain a lot of code from parse_config() Is it possible to restructure or split parse_config() in order to reduce the volume of code duplication? > +{ > + FILE * f; > + char buf[MAXLINE]; > + char * cp; > + char directive[MAXLINE]; > + size_t dirlength; > + char option[MAXLINE]; > + size_t optionlength; > + int i; > + struct stat sbuf; > + > +#define OW_KEEPALIVE 1 > +#define OW_DEADTIME 2 > +#define OW_WARNTIME 4 > +#define OW_DEADPING 8 I'd rather if these defines were just done at the top of the file. There doesn't seem to be any pressing need to #undef them at any stage. > + int mode = 0; > + > + long new_keepalive_ms = 0; > + long new_deadtime_ms = 0; > + long new_warntime_ms = 0; > + long new_deadping_ms = -1; > + char *new_keepalive_string = NULL; > + char *new_deadtime_string = NULL; > + char *new_warntime_string = NULL; > + char *new_deadping_string = NULL; > + > + if ((f = fopen(cfgfile, "r")) == NULL) { > + ha_log(LOG_ERR, "Cannot open config file [%s]", cfgfile); > + ha_log(LOG_INFO > + , "An annotated sample %s file is provided in" > + " the documentation." > + , cfgfile); > + ha_log(LOG_INFO > + , "Please copy it to %s, read it, customize it" > + ", and try again." > + , cfgfile); > + > + return(HA_FAIL); > + } > + > + fstat(fileno(f), &sbuf); > + if( config->cfg_time == sbuf.st_mtime ){ > + ha_log( LOG_ERR, "The timestamp of %s is not changed!\n", > cfgfile ); > + return 1; > + } > + > + /* It's ugly, but effective */ > + > + while (fgets(buf, MAXLINE, f) != NULL) { > + char * bp = buf; The line above contains trailing whitespace. > + int IsOptionDirective=1; > + > + /* Skip over white space */ > + bp += strspn(bp, WHITESPACE); > + The line above contains trailing whitespace. > + /* Zap comments on the line */ > + if ((cp = strchr(bp, COMMENTCHAR)) != NULL) { > + *cp = EOS; > + } > + The line above contains trailing whitespace. > + /* Strip '\n' and '\r' chars */ > + if ((cp = strpbrk(bp, CRLF)) != NULL) { > + *cp = EOS; > + } > + > + /* Ignore blank (and comment) lines */ > + if (*bp == EOS) { > + continue; > + } > + > + /* Now we expect a directive name */ > + > + dirlength = strcspn(bp, WHITESPACE); > + strncpy(directive, bp, dirlength); > + directive[dirlength] = EOS; > + > + bp += dirlength; > + > + /* Skip over Delimiters */ > + bp += strspn(bp, DELIMS); > + > + /* Now Check for the options-list stuff */ > + while (IsOptionDirective && *bp != EOS) { > + optionlength = strcspn(bp, DELIMS); > + strncpy(option, bp, optionlength); > + option[optionlength] = EOS; > + bp += optionlength; > + > + if( ! strcmp( directive, KEY_KEEPALIVE ) ){ > + new_keepalive_ms = get_msec( option ); > + new_keepalive_string = strdup( option ); > + } else if( ! strcmp( directive, KEY_DEADTIME ) ){ > + new_deadtime_ms = get_msec( option ); > + new_deadtime_string = strdup( option ); > + } else if( ! strcmp( directive, KEY_WARNTIME ) ){ > + new_warntime_ms = get_msec( option ); > + new_warntime_string = strdup( option ); > + } else if( ! strcmp( directive, KEY_DEADPING ) ){ > + new_deadping_ms = get_msec( option ); > + new_deadping_string = strdup( option ); > + } > + /* Skip over Delimiters */ > + bp += strspn(bp, DELIMS); > + } > + } > + fclose( f ); > + > + if( new_keepalive_ms == 0 ){ > + ha_log( LOG_ERR, "keepalive value is not set.\n" ); > + return 2; > + } > + > + > + if( new_deadtime_ms == 0 ){ > + ha_log( LOG_ERR, "deadtime value is not set.\n" ); > + return 2; > + } > + > + if( new_deadtime_ms != config->deadtime_ms ) mode |= OW_DEADTIME; > + > + if( new_keepalive_ms != config->heartbeat_ms ) mode |= OW_KEEPALIVE; > + if( new_warntime_ms != config->warntime_ms ) mode |= OW_WARNTIME; > + if( new_deadping_ms != config->deadping_ms ) mode |= OW_DEADPING; > + > + if( new_deadtime_ms < new_keepalive_ms * 2 ){ > + ha_log( LOG_ERR, "deadtime value is smaller than keepalive time > * 2.\n" ); > + ha_log( LOG_ERR, "keepalive value: %ld\n", new_keepalive_ms ); > + ha_log( LOG_ERR, "deadtime value: %ld\n", new_deadtime_ms ); > + return 3; > + } > + > + if( new_deadping_ms < new_keepalive_ms * 2 ){ > + ha_log( LOG_ERR, "deadping value is smaller than keepalive time > * 2.\n" ); > + ha_log( LOG_ERR, "keepalive value: %ld\n", new_keepalive_ms ); > + ha_log( LOG_ERR, "deadping value: %ld\n", new_deadping_ms ); > + return 4; > + } > + > +/* Maybe following is safe */ Could you explain on what might be unsafe about it? > + if( mode & ( OW_KEEPALIVE | OW_DEADTIME ) ){ > + if( new_keepalive_ms > config->deadtime_ms ){ > + modify_deadtime_value( new_deadtime_ms, > new_deadtime_string ); > + free( new_deadtime_string ); > + modify_keepalive_value( new_keepalive_ms, > new_keepalive_string ); > + free( new_keepalive_string ); > + } else { > + modify_keepalive_value( new_keepalive_ms, > new_keepalive_string ); > + free( new_keepalive_string ); > + modify_deadtime_value( new_deadtime_ms, > new_deadtime_string ); > + free( new_deadtime_string ); > + } > + } else { > + if( mode & OW_KEEPALIVE ){ > + modify_keepalive_value( new_keepalive_ms, > new_keepalive_string ); > + free( new_keepalive_string ); > + } > + > + if( mode & OW_DEADTIME ){ > + modify_deadtime_value( new_deadtime_ms, > new_deadtime_string ); > + free( new_deadtime_string ); > + } > + } > + > + if( mode & OW_WARNTIME ){ > + SetParameterValue( KEY_WARNTIME, new_warntime_string ); > + config->warntime_ms = new_warntime_ms; > + modify_channel_value( 2 ); > + } > + > + if( mode & OW_DEADPING ){ > + SetParameterValue( KEY_DEADPING, new_deadping_string ); > + config->deadping_ms = new_deadping_ms; > + > + for( i = 0; i < config->nodecount; i++ ) { > + // config->nodes[i].has_resources = DoManageResources; > + if (config->nodes[i].nodetype == PINGNODE_I) { > + config->nodes[i].dead_ticks = msto_longclock( > new_deadping_ms ); > + } > + } > + > + modify_channel_value( 3 ); > + } > + > + return 0; > +#undef OW_KEEPALIVE > +#undef OW_DEADTIME > +#undef OW_WARNTIME > +} > + > +void > +modify_keepalive_value( long new_keepalive_ms, char *new_keepalive_string ) > +{ > + int i; > + > + cl_log( LOG_INFO, "%s: keepalive %ld time is over-written to new value > %ld.\n", __FUNCTION__, config->heartbeat_ms, new_keepalive_ms ); > + config->heartbeat_ms = new_keepalive_ms; > + SetParameterValue( KEY_KEEPALIVE, new_keepalive_string ); > + > + modify_channel_value( 0 ); > +#define GTIMEOUT(GS) ((struct GTimeoutAppend*)((void*)(GS))) > + for( i = 0; i < timeout_add_full_array->len; i++ ){ > +/* This part is cut from Gmain_timeout_add_full() in > lib/clplumbing/GSource.c */ > + struct GTimeoutAppend *append; > + guint id = g_array_index( timeout_add_full_array, guint, i ); > + GSource *source; > + > + source = g_main_context_find_source_by_id( NULL, id ); > + > + if( source != NULL ){ > + append = GTIMEOUT( source ); > + // append->nexttime = add_longclock( time_longclock(), > msto_longclock( config->heartbeat_ms ) ); > + append->interval = config->heartbeat_ms; There seems to be mixed tabs and spaces in the indentation of the line above. > + } > + } > + > +/* For ccm */ Could you indent the comment on the line above? > + { > + struct ha_msg * msg; > + char keepalive[64]; > + > + snprintf( keepalive, sizeof( keepalive ), "%lx", > config->heartbeat_ms ); > + msg = ha_msg_new( 4 ); > + ha_msg_add( msg, F_TYPE, "new keepalive" ); > + ha_msg_add( msg, F_ORIG, curnode->nodename ); > + ha_msg_add( msg, F_STATUS, "new keepalive" ); > + ha_msg_add( msg, F_KEEPALIVE, keepalive ); > + > + heartbeat_monitor( msg, KEEPIT, NULL ); > + } > +#undef GTIMEOUT > +} > + > +void > +modify_deadtime_value( long new_deadtime_ms, char *new_deadtime_string ) I haven't investigated this, but how is the deadtime set on startup? Would it make sense to abstract it a bit so that the same code can be used for startup and modify? Perhaps a modified modify_deadtime_value() could be used in both cases. Ditto for modify_keepalive_value() > +{ > + int i; > + > + cl_log( LOG_INFO, "%s: deadtime %ld time is over-written to new value > %ld.\n", __FUNCTION__, config->deadtime_ms, new_deadtime_ms ); > + SetParameterValue( KEY_DEADTIME, new_deadtime_string ); > + > +/* this part is taken from init_config() */ > + for( i = 0; i < config->nodecount; i++ ) { > + // config->nodes[i].has_resources = DoManageResources; If the line above isn't needed, please just remove it. > + if( config->nodes[i].nodetype != PINGNODE_I && strcmp( > config->nodes[i].nodename, curnode->nodename ) == 0 ){ > + config->nodes[i].dead_ticks = msto_longclock( > new_deadtime_ms ); > + } > + } > +/* To HERE */ > + > +/* for heartbeat.c hb_init_watchdog_interval() */ > + if( watchdog_timeout_ms == 0L || watchdog_timeout_ms == ( > config->deadtime_ms + 10 ) ) watchdog_timeout_ms = new_deadtime_ms + 10; > +/* To HERE */ > + config->deadtime_ms = new_deadtime_ms; > + > + modify_channel_value( 1 ); > + > +/* for heartbeat.c hb_reregister_with_apphbd() and hb_init_watchdog() */ > + if ( UseApphbd == TRUE ) { > + hb_init_register_with_apphbd_dummy(); > + } > +/* To HERE */ > +} > + Are the "To HERE" commends just debugging/development stuff? If so, could you remove them. Also, could you please indent the other comments in this function. > /* > * Dump the configuration file - as a configuration file :-) > * > diff -uNr heartbeat-2.0.8/heartbeat/hb_api.c > heartbeat-2.0.8.orig/heartbeat/hb_api.c > --- heartbeat-2.0.8/heartbeat/hb_api.c 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/heartbeat/hb_api.c 2007-06-04 15:16:07.000000000 > +0900 > @@ -198,7 +198,7 @@ > seqno_t last_seq; > }; > > - > +GPtrArray *maxdispatchdelay_array; > > static int > should_msg_sendto_client(client_proc_t* client, struct ha_msg* msg) > @@ -1290,7 +1290,10 @@ > , APIclients_input_dispatch > , client, G_remove_client); > G_main_setdescription((GSource*)client->gsource, "API client"); > G_main_setmaxdispatchdelay((GSource*)client->gsource, > config->heartbeat_ms); > + > + maxdispatchdelay_array_add( ( void * )client->gsource, '*', 1, 0, 0 ); > + > G_main_setmaxdispatchtime((GSource*)client->gsource, 100); > if (ANYDEBUG) { > cl_log(LOG_DEBUG > diff -uNr heartbeat-2.0.8/heartbeat/hb_config.h > heartbeat-2.0.8.orig/heartbeat/hb_config.h > --- heartbeat-2.0.8/heartbeat/hb_config.h 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/heartbeat/hb_config.h 2007-06-15 > 15:44:23.000000000 +0900 > @@ -29,7 +29,13 @@ > int add_node(const char * value, int nodetype); > int parse_authfile(void); > int init_config(const char * cfgfile); > +int light_parse_config( const char * cfgfile ); > +void modify_keepalive_value( long, char * ); > +void modify_deadtime_value( long, char * ); > int StringToBaud(const char * baudstr); > const char * GetParameterValue(const char * name); > +void hb_init_register_with_apphbd_dummy( void ); > +void maxdispatchdelay_array_add( void *addr, char mode, int value, > pid_t pid, int type ); > +void setall_array_add( long id, int type, char * calc ); > > #endif /* _HB_CONFIG_H */ > diff -uNr heartbeat-2.0.8/heartbeat/hb_rexmit.c > heartbeat-2.0.8.orig/heartbeat/hb_rexmit.c > --- heartbeat-2.0.8/heartbeat/hb_rexmit.c 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/heartbeat/hb_rexmit.c 2007-05-31 > 11:29:24.000000000 +0900 > @@ -35,6 +35,9 @@ > #include <clplumbing/GSource.h> > #include <clplumbing/cl_random.h> > > +#include <hb_config.h> > + > +extern GPtrArray *setall_array; > > static void schedule_rexmit_request(struct node_info* node, seqno_t seq, > int delay); > > @@ -235,7 +240,9 @@ > > sourceid = Gmain_timeout_add_full(G_PRIORITY_HIGH - 1, delay, > send_rexmit_request, ri, NULL); > G_main_setall_id(sourceid, "retransmit request", > config->heartbeat_ms/2, 10); > + > + setall_array_add( sourceid, 0, strdup( "/ 2" ) ); > > if (sourceid == 0){ > cl_log(LOG_ERR, "%s: scheduling a timeout event failed", > diff -uNr heartbeat-2.0.8/heartbeat/hb_signal.c > heartbeat-2.0.8.orig/heartbeat/hb_signal.c > --- heartbeat-2.0.8/heartbeat/hb_signal.c 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/heartbeat/hb_signal.c 2007-05-31 > 10:52:29.000000000 +0900 > @@ -60,7 +60,18 @@ > #define HB_SIG_PARENT_DEBUG_USR2_SIG 0x0020UL > #define HB_SIG_REREAD_CONFIG_SIG 0x0040UL > #define HB_SIG_FALSE_ALARM_SIG 0x0080UL > +#define HB_SIG_LIGHT_REREAD_CONFIG_SIG 0x0100UL > > +/* Maybe no need > +typedef struct MaxDispatchDelay_Array { > + GCHSource *addr; > + char mode; > + int value; > + int pid; > + int type; > +} MaxDispatchDelay_Array_t; Please use tabs not spaces for indentation :) > +extern GPtrArray *maxdispatchdelay_array; > +*/ > > /* > * This function does NOT have the same semantics as setting SIG_IGN. > @@ -298,6 +310,9 @@ > void > hb_signal_reread_config_action(void) > { > +/* Maybe no need > + int i; > +*/ > int j; > int signal_children = 0; > > @@ -347,6 +363,19 @@ > return_to_orig_privs(); > for (j=0; j < procinfo->nprocs; ++j) { > if (procinfo->info+j != curproc) { > +/* Maybe no need > + for( i = 0; ; i++ ){ > + MaxDispatchDelay_Array_t *carray = > g_ptr_array_index( maxdispatchdelay_array, i ); > + if( carray == NULL ) break; > + if( carray->pid == > procinfo->info[j].pid ){ > + ha_log( LOG_INFO, "Remove > infomation from maxdispatchdelay_array mem = %x, pid = %d\n", ( int )carray, > carray->pid ); > + g_ptr_array_remove( > maxdispatchdelay_array, carray ); > + free( carray ); > + i--; > + } > + } > +*/ It looks like it is needed to me. But if not, can you just remove it? > + ha_log( LOG_INFO, "SP: CL_KILL > procinfo->info[%d].pid = %d, getpid = %d by SIGHUP", j, ( int > )procinfo->info[j].pid, ( int )getpid() ); > CL_KILL(procinfo->info[j].pid, SIGHUP); > } > } > @@ -354,6 +383,18 @@ > } > } > > +void > +hb_signal_light_reread_config_handler( int sig ) > +{ > + __hb_signal_pending |= HB_SIG_LIGHT_REREAD_CONFIG_SIG; > +} > + > +void > +hb_signal_light_reread_config_action( void ) > +{ > + light_parse_config( CONFIG_NAME ); > + return; > +} > > void > hb_signal_false_alarm_handler(int sig) > @@ -444,6 +485,10 @@ > hb_signal_reread_config_action(); > } > > + if (handlers&HB_SIG_LIGHT_REREAD_CONFIG_SIG) { > + hb_signal_light_reread_config_action(); > + } > + > if (handlers&HB_SIG_FALSE_ALARM_SIG) { > hb_signal_false_alarm_action(); > } > @@ -478,6 +523,7 @@ > , {SIGALRM, hb_signal_false_alarm_handler, 1} > , {SIGUSR1, hb_signal_debug_usr1_handler, 1} > , {SIGUSR2, hb_signal_debug_usr2_handler, 1} > + , {SIGRTMAX, hb_signal_light_reread_config_handler, 1} Is using SIGRTMAX in this way safe? Is SIGHUP already used for something else? > , {0, 0, 0} > }; > > @@ -634,6 +680,7 @@ > { {SIGTERM, hb_signal_term_handler, 1} > , {SIGUSR1, parent_hb_signal_debug_usr1_handler, 1} > , {SIGUSR2, parent_hb_signal_debug_usr2_handler, 1} > + , {SIGRTMAX, hb_signal_light_reread_config_handler, 1} > , {SIGALRM, hb_signal_false_alarm_handler, 1} > , {0, 0, 0} > }; > diff -uNr heartbeat-2.0.8/heartbeat/hb_signal.h > heartbeat-2.0.8.orig/heartbeat/hb_signal.h > --- heartbeat-2.0.8/heartbeat/hb_signal.h 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/heartbeat/hb_signal.h 2007-05-31 > 10:52:29.000000000 +0900 > @@ -57,6 +57,10 @@ > > void hb_signal_reread_config_action(void); > > +void hb_signal_light_reread_config_handler(int sig); > + > +void hb_signal_light_reread_config_action(void); > + > void hb_signal_false_alarm_handler(int sig); > > void hb_signal_false_alarm_action(void); > diff -uNr heartbeat-2.0.8/heartbeat/heartbeat.c > heartbeat-2.0.8.orig/heartbeat/heartbeat.c > --- heartbeat-2.0.8/heartbeat/heartbeat.c 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/heartbeat/heartbeat.c 2007-06-18 > 10:46:14.000000000 +0900 > @@ -316,7 +317,8 @@ > > char * watchdogdev = NULL; > static int watchdogfd = -1; > -static int watchdog_timeout_ms = 0L; > +// static int watchdog_timeout_ms = 0L; > +int watchdog_timeout_ms = 0L; > > int shutdown_in_progress = FALSE; > int startup_complete = FALSE; > @@ -342,6 +344,9 @@ > GTRIGSource* write_delcachefile = NULL; > extern GSList* del_node_list; > > +GPtrArray *maxdispatchdelay_array; > +GPtrArray *setall_array; > +GArray * timeout_add_full_array; > > #undef DO_AUDITXMITHIST > #ifdef DO_AUDITXMITHIST > @@ -490,6 +495,33 @@ > NULL, > }; > > +void > +maxdispatchdelay_array_add( void *addr, char mode, int value, pid_t pid, int > type ) > +{ > + MaxDispatchDelay_Array_t *carray; > + > + carray = malloc( sizeof( MaxDispatchDelay_Array_t ) ); Should this (and others) be cl_malloc()? > + carray->addr = addr; > + carray->mode = mode; > + carray->value = value; > + > + carray->pid = pid; > + carray->type = type; > + g_ptr_array_add( maxdispatchdelay_array, ( gpointer )carray ); > +} > + > +void > +setall_array_add( long id, int type, char * calc ) > +{ > + SetAll_Array_t *sarray; > + > + sarray = malloc( sizeof( SetAll_Array_t ) ); > + sarray->id = id; > + sarray->type = type; > + sarray->calc = calc; > + g_ptr_array_add( setall_array, ( gpointer )sarray ); > +} > + > static void > init_procinfo() > { > @@ -747,10 +779,13 @@ > FifoChildSource = G_main_add_IPC_Channel(PRI_FIFOMSG > , fifochildipc[P_READFD] > , FALSE, FIFO_child_msg_dispatch, NULL, NULL); > + cl_log( LOG_INFO, "%s use config->heartbeat_ms: %ld\n", __FUNCTION__, > config->heartbeat_ms ); > G_main_setmaxdispatchdelay((GSource*)FifoChildSource, > config->heartbeat_ms); > G_main_setmaxdispatchtime((GSource*)FifoChildSource, 50); > G_main_setdescription((GSource*)FifoChildSource, "FIFO"); > - > + > + maxdispatchdelay_array_add( ( void * )FifoChildSource, '*', 1, pid, 0 ); > + > return HA_OK; > } > > @@ -1462,6 +1503,7 @@ > G_main_setmaxdispatchtime((GSource*)s, 50); > G_main_setdescription((GSource*)s, "write child"); > > + maxdispatchdelay_array_add( ( void * )s, '/', 4, > sysmedia[j]->wchan[P_WRITEFD]->farside_pid, 0 ); > > /* Connect up the read child IPC channel... */ > s = G_main_add_IPC_Channel(PRI_READPKT > @@ -1474,8 +1518,8 @@ > G_main_setmaxdispatchtime((GSource*)s, 50); > G_main_setdescription((GSource*)s, "read child"); > > -} > - > + maxdispatchdelay_array_add( ( void * )s, '/', 4, > sysmedia[j]->rchan[P_WRITEFD]->farside_pid, 0 ); > + } > > /* > * Things to do on a periodic basis... > @@ -1486,6 +1533,9 @@ > , hb_send_local_status, NULL, NULL); > G_main_setall_id(id, "send local status", 10+config->heartbeat_ms/2, > 50); > > + g_array_append_val( timeout_add_full_array, id ); > + setall_array_add( id, 0, strdup( "/ 2 + 10" ) ); > + > id=Gmain_timeout_add_full(PRI_AUDITCLIENT > , config->initial_deadtime_ms > , set_init_deadtime_passed_flag > @@ -1493,6 +1544,8 @@ > , NULL); > G_main_setall_id(id, "init deadtime passed", config->warntime_ms, 50); > > + setall_array_add( id, 2, strdup( "" ) ); > + > /* Dump out memory stats periodically... */ > memstatsinterval = (debug_level ? 10*60*1000 : ONEDAY*1000); > id=Gmain_timeout_add_full(PRI_DUMPSTATS, memstatsinterval > @@ -1515,11 +1568,17 @@ > id=Gmain_timeout_add_full(PRI_CHECKSIGS, config->heartbeat_ms > , Gmain_hb_signal_process_pending, NULL, NULL); > G_main_setall_id(id, "check for signals", 10+config->heartbeat_ms/2, > 50); > > + cl_log( LOG_INFO, "heartbeat.c: G_main_timeout_add_full %d\n", id ); > + g_array_append_val( timeout_add_full_array, id ); > + setall_array_add( id, 0, strdup( "/ 2 + 10" ) ); > + > id=Gmain_timeout_add_full(PRI_FREEMSG, 500 > , Gmain_update_msgfree_count, NULL, NULL); > G_main_setall_id(id, "update msgfree count", config->deadtime_ms, 50); > > + setall_array_add( id, 1, strdup( "" ) ); > + > if (UseApphbd) { > Gmain_timeout_add_full(PRI_DUMPSTATS > , 60*(1000-10) /* Not quite on a minute boundary */ > @@ -1817,7 +1875,8 @@ > G_main_setmaxdispatchdelay((GSource*)regsource, config->deadtime_ms); > G_main_setmaxdispatchtime((GSource*)regsource, 20); > G_main_setdescription((GSource*)regsource, "client registration"); > > + maxdispatchdelay_array_add( ( void * )regsource, '*', 1, 0, 1 ); > > if (regsource == NULL) { > cl_log(LOG_DEBUG > @@ -1908,6 +1967,8 @@ > void > hb_initiate_shutdown(int quickshutdown) > { > + int i; > + > if (ANYDEBUG) { > cl_log(LOG_DEBUG, "hb_initiate_shutdown() called."); > } > @@ -1930,6 +1991,24 @@ > , "Shutdown delayed until Communication is up."); > return; > } > + > +/* > + for( ; ; ){ > + MaxDispatchDelay_Array_t *carray = g_ptr_array_index( > maxdispatchdelay_array, 0 ); > + if( carray == NULL ) break; > + g_ptr_array_remove( maxdispatchdelay_array, carray ); > + free( carray ); > + } > +*/ If the code above isn't needed, please remove it. > + g_ptr_array_free( maxdispatchdelay_array, TRUE ); > + > + for( i = 0; i < setall_array->len; i++ ){ > + SetAll_Array_t *sarray = g_ptr_array_index( setall_array, i ); > + if( sarray != NULL ) free( sarray->calc ); > + } > + g_ptr_array_free( setall_array, TRUE ); > + g_array_free( timeout_add_full_array, TRUE ); > + > send_local_status(); > if (!quickshutdown && DoManageResources) { > /* THIS IS RESOURCE WORK! FIXME */ > @@ -4079,7 +4166,10 @@ > send_cluster_msg(msg); > > id = Gmain_timeout_add(1000, send_reqnodes_msg, (gpointer)i); > G_main_setall_id(id, "send_reqnodes_msg", config->heartbeat_ms, 100); > + > + setall_array_add( id, 0, strdup( "" ) ); > + > return FALSE; > } > > @@ -4694,6 +4787,9 @@ > /*init table for nodename/uuid lookup*/ > inittable(); > > + maxdispatchdelay_array = g_ptr_array_new(); > + setall_array = g_ptr_array_new(); > + timeout_add_full_array = g_array_new( FALSE, FALSE, sizeof( guint ) ); > > /* > * We think we just performed an "exec" of ourselves to restart. > @@ -5024,6 +5121,11 @@ > } > } > > +void hb_init_register_with_apphbd_dummy( void ) > +{ > + hb_init_register_with_apphbd(); > +} > + > static gboolean > hb_reregister_with_apphbd(gpointer dummy) > { > diff -uNr heartbeat-2.0.8/include/heartbeat.h > heartbeat-2.0.8.orig/include/heartbeat.h > --- heartbeat-2.0.8/include/heartbeat.h 2007-01-12 11:57:05.000000000 > +0900 > +++ heartbeat-2.0.8.orig/include/heartbeat.h 2007-05-31 10:50:25.000000000 > +0900 > @@ -370,6 +370,20 @@ > char* path; /* Path (argv[0])? */ > }; > > +typedef struct MaxDispatchDelay_Array { > + void *addr; > + char mode; > + int value; > + int pid; > + int type; > +} MaxDispatchDelay_Array_t; > + > +typedef struct SetAll_Array { > + unsigned long id; > + int type; > + char * calc; > +} SetAll_Array_t; > + > int api_remove_client_pid(pid_t c_pid, const char * reason); > > > diff -uNr heartbeat-2.0.8/lib/clplumbing/GSource.c > heartbeat-2.0.8.orig/lib/clplumbing/GSource.c > --- heartbeat-2.0.8/lib/clplumbing/GSource.c 2007-01-12 11:57:07.000000000 > +0900 > +++ heartbeat-2.0.8.orig/lib/clplumbing/GSource.c 2007-05-31 > 10:46:30.000000000 +0900 > @@ -1599,7 +1599,8 @@ > { > G_main_setdescription_id(id, description); > G_main_setmaxdispatchdelay_id(id, delay); > - G_main_setmaxdispatchtime_id(id, delay); > + // G_main_setmaxdispatchtime_id(id, delay); > + G_main_setmaxdispatchtime_id(id, elapsed); > } The GSource.c fragment above is already in the dev tree :) > static void TempProcessRegistered(ProcTrack* p); > diff -uNr heartbeat-2.0.8/membership/ccm/ccm.c > heartbeat-2.0.8.orig/membership/ccm/ccm.c > --- heartbeat-2.0.8/membership/ccm/ccm.c 2007-01-12 11:57:08.000000000 > +0900 > +++ heartbeat-2.0.8.orig/membership/ccm/ccm.c 2007-06-13 11:07:52.000000000 > +0900 > @@ -113,6 +113,9 @@ > return HA_OK; > } > > +extern ccm_info_t* ccm_info_saved; > +extern ll_cluster_t* hb_fd_saved; > + > int > ccm_control_process(ccm_info_t *info, ll_cluster_t * hb) > { > @@ -149,6 +152,13 @@ > } > ha_msg_del(msg); > msg = newmsg; > + } else if(strcasecmp(type, "new keepalive") == 0){ > + hb_fd_saved->llc_ops->signoff( hb_fd_saved, FALSE ); > + hb_fd_saved->llc_ops->signon( hb_fd_saved, "ccm" ); > + sleep( 3 ); What is the purpose of sleep(3). It seems strange that it is hard-coded. > + > + ccm_configure_timeout( hb_fd_saved, ccm_info_saved ); > + return TRUE; > } else if(strcasecmp(type, T_STATUS) == 0){ > const char* nodetype; > const char* site; > diff -uNr heartbeat-2.0.8/membership/ccm/ccm.h > heartbeat-2.0.8.orig/membership/ccm/ccm.h > --- heartbeat-2.0.8/membership/ccm/ccm.h 2007-01-12 11:57:08.000000000 > +0900 > +++ heartbeat-2.0.8.orig/membership/ccm/ccm.h 2007-06-08 14:20:24.000000000 > +0900 > @@ -469,6 +469,8 @@ > int has_quorum; /* -1, not set, 0, no quorum, 1, has > quorum */ > } ccm_info_t; > > +void ccm_configure_timeout(ll_cluster_t *, ccm_info_t *); > + > /* > * datastructure passed to the event loop. > * This acts a handle, and should not be interpreted > diff -uNr heartbeat-2.0.8/membership/ccm/ccm_statemachine.c > heartbeat-2.0.8.orig/membership/ccm/ccm_statemachine.c > --- heartbeat-2.0.8/membership/ccm/ccm_statemachine.c 2007-01-12 > 11:57:08.000000000 +0900 > +++ heartbeat-2.0.8.orig/membership/ccm/ccm_statemachine.c 2007-06-18 > 11:36:08.000000000 +0900 > @@ -201,7 +201,7 @@ > /* */ > /* timeout configuration function */ > /* */ > -static void > +void > ccm_configure_timeout(ll_cluster_t *hb, ccm_info_t *info) > { > long keepalive = hb->llc_ops->get_keepalive(hb); > _______________________________________________________ > Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org > http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev > Home Page: http://linux-ha.org/ _______________________________________________________ Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page: http://linux-ha.org/