Honza,

Great work

Make sure to add a Reviewed-by me line to the checkin.

Regards
-steve

On 12/02/2010 07:05 AM, Jan Friesse wrote:
> This may typically happen if local firewall is enabled. Patch adds new
> item to statistics called continuous_gather where is number of
> continuous entered gather state. If this number is bigger then
> MAX_NO_CONT_GATHER, warning message is displayed. This is also used on
> exiting, so stop of corosync is now possible even with enabled firewall.
> 
> Signed-off-by: Jan Friesse <[email protected]>
> ---
>  exec/main.c                    |   15 +++++++++++++++
>  exec/totemsrp.c                |   15 +++++++++++++++
>  include/corosync/totem/totem.h |    6 ++++++
>  3 files changed, 36 insertions(+), 0 deletions(-)
> 
> diff --git a/exec/main.c b/exec/main.c
> index cd6cb83..b04f503 100644
> --- a/exec/main.c
> +++ b/exec/main.c
> @@ -198,8 +198,17 @@ void corosync_shutdown_request (void)
>  
>  static void *corosync_exit_thread_handler (void *arg)
>  {
> +     totempg_stats_t * stats;
> +
>       sem_wait (&corosync_exit_sem);
>  
> +     stats = api->totem_get_stats();
> +     if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
> +         stats->mrp->srp->operational_entered == 0) {
> +             unlink_all_completed ();
> +             /* NOTREACHED */
> +     }
> +
>       corosync_service_unlink_all (api, unlink_all_completed);
>  
>       return arg;
> @@ -626,6 +635,9 @@ static void corosync_totem_stats_updater (void *data)
>       objdb->object_key_replace (stats->mrp->srp->hdr.handle,
>               "rx_msg_dropped", strlen("rx_msg_dropped"),
>               &stats->mrp->srp->rx_msg_dropped, sizeof 
> (stats->mrp->srp->rx_msg_dropped));
> +     objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> +             "continuous_gather", strlen("continuous_gather"),
> +             &stats->mrp->srp->continuous_gather, sizeof 
> (stats->mrp->srp->continuous_gather));
>  
>       total_mtt_rx_token = 0;
>       total_token_holdtime = 0;
> @@ -784,6 +796,9 @@ static void corosync_totem_stats_init (void)
>               objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
>                       "rx_msg_dropped", &zero_64,
>                       sizeof (zero_64), OBJDB_VALUETYPE_UINT64);
> +             objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
> +                     "continuous_gather", &zero_32,
> +                     sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
>  
>       }
>       /* start stats timer */
> diff --git a/exec/totemsrp.c b/exec/totemsrp.c
> index f7a6638..c9ad391 100644
> --- a/exec/totemsrp.c
> +++ b/exec/totemsrp.c
> @@ -502,6 +502,7 @@ struct totemsrp_instance {
>       struct memb_commit_token *commit_token;
>  
>       totemsrp_stats_t stats;
> +
>       void * token_recv_event_handle;
>       void * token_sent_event_handle;
>       char commit_token_storage[9000];
> @@ -1789,6 +1790,8 @@ static void memb_state_operational_enter (struct 
> totemsrp_instance *instance)
>       instance->memb_state = MEMB_STATE_OPERATIONAL;
>  
>       instance->stats.operational_entered++;
> +     instance->stats.continuous_gather = 0;
> +
>       instance->my_received_flg = 1;
>  
>       reset_pause_timeout (instance);
> @@ -1853,6 +1856,15 @@ static void memb_state_gather_enter (
>  
>       instance->memb_state = MEMB_STATE_GATHER;
>       instance->stats.gather_entered++;
> +     instance->stats.continuous_gather++;
> +
> +     if (instance->stats.continuous_gather > MAX_NO_CONT_GATHER) {
> +             log_printf (instance->totemsrp_log_level_warning,
> +                     "Totem is unable to form a cluster because of an "
> +                     "operating system or network fault. The most common "
> +                     "cause of this message is that the local firewall is "
> +                     "configured improperly.\n");
> +     }
>  
>       return;
>  }
> @@ -1897,6 +1909,7 @@ static void memb_state_commit_enter (
>       reset_token_timeout (instance); // REVIEWED
>  
>       instance->stats.commit_entered++;
> +     instance->stats.continuous_gather = 0;
>  
>       /*
>        * reset all flow control variables since we are starting a new ring
> @@ -2093,6 +2106,8 @@ originated:
>  
>       instance->memb_state = MEMB_STATE_RECOVERY;
>       instance->stats.recovery_entered++;
> +     instance->stats.continuous_gather = 0;
> +
>       return;
>  }
>  
> diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
> index 4e2e475..cf78e4c 100644
> --- a/include/corosync/totem/totem.h
> +++ b/include/corosync/totem/totem.h
> @@ -52,6 +52,11 @@
>  #define SEND_THREADS_MAX     16
>  #define INTERFACE_MAX                2
>  
> +/*
> + * Maximum number of continuous gather states
> + */
> +#define MAX_NO_CONT_GATHER   3
> +
>  struct totem_interface {
>       struct totem_ip_address bindnet;
>       struct totem_ip_address boundto;
> @@ -250,6 +255,7 @@ typedef struct {
>       uint64_t recovery_token_lost;
>       uint64_t consensus_timeouts;
>       uint64_t rx_msg_dropped;
> +     uint32_t continuous_gather;
>  
>       int earliest_token;
>       int latest_token;

_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to