Under overload conditions with thousands of checkpoints, and an exiting
process, it is possible that many simulatenous checkpoint expirations
will result in an assertion.
This patch resolves that issue.
Regards
-steve
Index: exec/ckpt.c
===================================================================
--- exec/ckpt.c (revision 1641)
+++ exec/ckpt.c (working copy)
@@ -159,6 +159,7 @@
struct checkpoint {
struct list_head list;
+ struct list_head expiry_list;
mar_name_t name;
mar_uint32_t ckpt_id;
mar_ckpt_checkpoint_creation_attributes_t checkpoint_creation_attributes;
@@ -370,6 +371,8 @@
DECLARE_LIST_INIT(checkpoint_recovery_list_head);
+DECLARE_LIST_INIT(my_checkpoint_expiry_list_head);
+
static mar_uint32_t global_ckpt_id = 0;
static enum sync_state my_sync_state;
@@ -386,6 +389,10 @@
static unsigned int my_should_sync = 0;
+static unsigned int my_token_callback_active = 0;
+
+static void * my_token_callback_handle;
+
struct checkpoint_cleanup {
struct list_head list;
mar_name_t checkpoint_name;
@@ -1017,9 +1024,7 @@
iovec.iov_base = (char *)&req_exec_ckpt_checkpointclose;
iovec.iov_len = sizeof (req_exec_ckpt_checkpointclose);
- assert (totempg_groups_mcast_joined (openais_group_handle, &iovec, 1, TOTEMPG_AGREED) == 0);
-
- return (-1);
+ return (totempg_groups_mcast_joined (openais_group_handle, &iovec, 1, TOTEMPG_AGREED));
}
static int ckpt_exec_init_fn (struct objdb_iface_ver0 *objdb)
@@ -1261,6 +1266,7 @@
checkpoint->unlinked = 0;
list_init (&checkpoint->list);
list_init (&checkpoint->sections_list_head);
+ list_init (&checkpoint->expiry_list);
list_add (&checkpoint->list, &checkpoint_list_head);
checkpoint->reference_count = 1;
checkpoint->retention_timer = 0;
@@ -1473,31 +1479,69 @@
}
-void timer_function_retention (void *data)
+int callback_expiry (enum totem_callback_token_type type, void *data)
{
struct checkpoint *checkpoint = (struct checkpoint *)data;
- struct req_exec_ckpt_checkpointretentiondurationexpire req_exec_ckpt_checkpointretentiondurationexpire;
+ struct req_exec_ckpt_checkpointunlink req_exec_ckpt_checkpointunlink;
struct iovec iovec;
+ unsigned int res;
+ struct list_head *list;
- checkpoint->retention_timer = 0;
- req_exec_ckpt_checkpointretentiondurationexpire.header.size =
- sizeof (struct req_exec_ckpt_checkpointretentiondurationexpire);
- req_exec_ckpt_checkpointretentiondurationexpire.header.id =
- SERVICE_ID_MAKE (CKPT_SERVICE,
- MESSAGE_REQ_EXEC_CKPT_CHECKPOINTRETENTIONDURATIONEXPIRE);
+ list = my_checkpoint_expiry_list_head.next;
+ while (!list_empty(&my_checkpoint_expiry_list_head)) {
+ checkpoint = list_entry (list,
+ struct checkpoint, expiry_list);
- memcpy (&req_exec_ckpt_checkpointretentiondurationexpire.checkpoint_name,
- &checkpoint->name,
- sizeof (mar_name_t));
- req_exec_ckpt_checkpointretentiondurationexpire.ckpt_id =
- checkpoint->ckpt_id;
+ if (checkpoint->reference_count == 0) {
+ req_exec_ckpt_checkpointunlink.header.size =
+ sizeof (struct req_exec_ckpt_checkpointunlink);
+ req_exec_ckpt_checkpointunlink.header.id =
+ SERVICE_ID_MAKE (CKPT_SERVICE,
+ MESSAGE_REQ_EXEC_CKPT_CHECKPOINTUNLINK);
- iovec.iov_base = (char *)&req_exec_ckpt_checkpointretentiondurationexpire;
- iovec.iov_len = sizeof (req_exec_ckpt_checkpointretentiondurationexpire);
+ req_exec_ckpt_checkpointunlink.source.conn = 0;
+ req_exec_ckpt_checkpointunlink.source.nodeid = 0;
- assert (totempg_groups_mcast_joined (openais_group_handle, &iovec, 1, TOTEMPG_AGREED) == 0);
+ memcpy (&req_exec_ckpt_checkpointunlink.checkpoint_name,
+ &checkpoint->name,
+ sizeof (mar_name_t));
+
+ iovec.iov_base = (char *)&req_exec_ckpt_checkpointunlink;
+ iovec.iov_len = sizeof (req_exec_ckpt_checkpointunlink);
+
+ res = totempg_groups_mcast_joined (openais_group_handle, &iovec, 1, TOTEMPG_AGREED);
+ if (res == -1) {
+ return (-1);
+ }
+ log_printf (LOG_LEVEL_NOTICE,
+ "Expiring checkpoint %s\n",
+ get_mar_name_t (&checkpoint->name));
+ }
+
+ list_del (&checkpoint->expiry_list);
+ list = my_checkpoint_expiry_list_head.next;
+ }
+ my_token_callback_active = 0;
+ return (0);
}
+void timer_function_retention (void *data)
+{
+ struct checkpoint *checkpoint = (struct checkpoint *)data;
+ checkpoint->retention_timer = 0;
+ list_add (&checkpoint->expiry_list, &my_checkpoint_expiry_list_head);
+
+ if (my_token_callback_active == 0) {
+ totempg_callback_token_create (
+ &my_token_callback_handle,
+ TOTEM_CALLBACK_TOKEN_SENT,
+ 1,
+ callback_expiry,
+ NULL);
+ my_token_callback_active = 1;
+ }
+}
+
static void message_handler_req_exec_ckpt_checkpointclose (
void *message,
unsigned int nodeid)
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais