Package: pacemaker
Version: 1.1.10+git20130802-4
Severity: important
Tags: patch
User: [email protected]
Usertags: origin-ubuntu utopic ubuntu-patch

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512

In Ubuntu, the attached patch was applied to achieve the following:

[Impact]

 * Whenever a user uses "crm node standby" the code can make lrmd still
   try to monitor resource put into stand-by and cause error messages.

[Test Case]

 * To use "crm node standby" and check lrmd does not stop monitoring
   not set to stand-by.

[Regression Potential]

 * users already tested and are using in production.
 * based on upstream fixes for lrmd monitoring.
 * potential race conditions (based on upstream history).

[Changelog]

  * Fix: services: Do not allow duplicate recurring op entries - 1/3 (LP: 
#1353473)
  * High: lrmd: Merge duplicate recurring monitor operations - 2/3 (LP: 
#1353473)
  * Fix: lrmd: Cancel recurring operations before stop action is executed - 3/3 
(LP: #1353473)

[Fix]

I wasn't able to reproduce this error so far but the fix seems a 
straightforward cherry-picking from upstream patch set fix:

48f90f6 Fix: services: Do not allow duplicate recurring op entries
c29ab27 High: lrmd: Merge duplicate recurring monitor operations
348bb51 Fix: lrmd: Cancel recurring operations before stop action is executed

Thanks for considering the patch.

- -- System Information:
Debian Release: jessie/sid
  APT prefers trusty-updates
  APT policy: (500, 'trusty-updates'), (500, 'trusty')
Architecture: amd64 (x86_64)

Kernel: Linux 3.13.0-32-generic (SMP w/4 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8) (ignored: LC_ALL 
set to en_US.UTF-8)
Shell: /bin/sh linked to /bin/dash

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4

iQEcBAEBCgAGBQJT5SyrAAoJEAynk4KHaD/AOEoH/jfBOykuLjBvGCTeRdpTfW+n
NgGLZ9DzaBpIQ7/xJr7URBOtdcW7yzHXxoRXjPe5WiWcckEG+PVh8cu8WIeqw5my
bbYcArDjDmlMNOK92P14YSsghWC/PsORB7xvYtjVX5xEll1d29iDBI54t4TsjwKN
RhPxWWfjkImFGE8mgYY3JwlftbRKG/nMHoKiYGD/RW9BgqTWCay4VClVw0i5C+OA
zmbMvvjw5oxmJ2fiu2IENI5Htnjep/lP05kZTFeEp1Qde1svV3W+miiNLtsRalwV
r6Jz3NAcM4vU8eja80wH3SPGZD6dynJey/hIalvY/oTe3u6tdqV2TG1ML2Q4RjM=
=ucl+
-----END PGP SIGNATURE-----
diff -Nru pacemaker-1.1.10+git20130802/debian/changelog pacemaker-1.1.10+git20130802/debian/changelog
diff -Nru pacemaker-1.1.10+git20130802/debian/control pacemaker-1.1.10+git20130802/debian/control
--- pacemaker-1.1.10+git20130802/debian/control	2014-08-01 11:27:39.000000000 -0300
+++ pacemaker-1.1.10+git20130802/debian/control	2014-08-08 16:59:08.000000000 -0300
@@ -1,8 +1,7 @@
 Source: pacemaker
 Section: admin
 Priority: optional
-Maintainer: Ubuntu Developers <[email protected]>
-XSBC-Original-Maintainer: Debian HA Maintainers <[email protected]>
+Maintainer: Debian HA Maintainers <[email protected]>
 Uploaders: Martin Loschwitz <[email protected]>, Anibal Monsalve Salazar <[email protected]>, Simon Horman <[email protected]>, Frederik Schüler <[email protected]>
 Build-Depends: debhelper (>= 7.0.50), automake, autoconf, chrpath, libsnmp-dev, libglib2.0-dev, perl, net-tools, python (>= 2.6.6-3~), libtool, libcurl4-openssl-dev | libcurl3-openssl-dev, libxml2-dev, bison, flex, uuid-dev, libbz2-dev, zlib1g-dev, libltdl3-dev, openssh-client, libgnutls-dev, libpam0g-dev, libncurses5-dev, libcorosync-dev (>= 1.4.4-1), libheartbeat2-dev (>= 3.0), libxslt1-dev, libesmtp-dev, cluster-glue-dev (>= 1.0.11+hg2754), liblrm2-dev, libpils2-dev, libplumb2-dev, libplumbgpl2-dev, libstonith1-dev, help2man, libxml2-utils, xsltproc, docbook-xsl, resource-agents-dev, libqb-dev (>= 0.16.0.real-1), hardening-wrapper, libcfg-dev
 Standards-Version: 3.9.3
diff -Nru pacemaker-1.1.10+git20130802/debian/patches/Fix-lrmd-Cancel-recurring-operations-before-stop-act.patch pacemaker-1.1.10+git20130802/debian/patches/Fix-lrmd-Cancel-recurring-operations-before-stop-act.patch
--- pacemaker-1.1.10+git20130802/debian/patches/Fix-lrmd-Cancel-recurring-operations-before-stop-act.patch	1969-12-31 21:00:00.000000000 -0300
+++ pacemaker-1.1.10+git20130802/debian/patches/Fix-lrmd-Cancel-recurring-operations-before-stop-act.patch	2014-08-08 16:09:40.000000000 -0300
@@ -0,0 +1,295 @@
+Description: [PATCH 3/3] Fix: lrmd: Cancel recurring operations before stop action is executed
+
+Origin: upstream, commit: 348bb51
+Author: David Vossel <[email protected]>
+Last-Updated: 2014-08-08
+Bug-Ubuntu: https://bugs.launchpad.net/bugs/1353473
+
+---
+ include/crm/common/mainloop.h |   1 +
+ lib/common/mainloop.c         | 167 ++++++++++++++++++++++++++++--------------
+ lib/services/services.c       |   9 ++-
+ lrmd/lrmd.c                   |  10 +--
+ 4 files changed, 127 insertions(+), 60 deletions(-)
+
+diff --git a/include/crm/common/mainloop.h b/include/crm/common/mainloop.h
+index 0941f1b..baee2ee 100644
+--- a/include/crm/common/mainloop.h
++++ b/include/crm/common/mainloop.h
+@@ -93,6 +93,7 @@ const char *mainloop_child_name(mainloop_child_t * child);
+ 
+ pid_t mainloop_child_pid(mainloop_child_t * child);
+ void mainloop_clear_child_userdata(mainloop_child_t * child);
++gboolean mainloop_child_kill(pid_t pid);
+ 
+ #  define G_PRIORITY_MEDIUM (G_PRIORITY_HIGH/2)
+ 
+diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c
+index d678584..a77e90c 100644
+--- a/lib/common/mainloop.c
++++ b/lib/common/mainloop.c
+@@ -855,10 +855,35 @@ mainloop_clear_child_userdata(mainloop_child_t * child)
+     child->privatedata = NULL;
+ }
+ 
++/* good function name */
++static void
++child_free(mainloop_child_t *child)
++{
++    if (child->timerid != 0) {
++        crm_trace("Removing timer %d", child->timerid);
++        g_source_remove(child->timerid);
++        child->timerid = 0;
++    }
++    free(child->desc);
++    free(child);
++}
++
++/* terrible function name */
++static int
++child_kill_helper(mainloop_child_t *child)
++{
++    if (kill(child->pid, SIGKILL) < 0) {
++        crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid);
++        return -errno;
++    }
++    return 0;
++}
++
+ static gboolean
+ child_timeout_callback(gpointer p)
+ {
+     mainloop_child_t *child = p;
++    int rc = 0;
+ 
+     child->timerid = 0;
+     if (child->timeout) {
+@@ -866,88 +891,122 @@ child_timeout_callback(gpointer p)
+         return FALSE;
+     }
+ 
++    rc = child_kill_helper(child);
++    if (rc == ESRCH) {
++        /* Nothing left to do. pid doesn't exist */
++        return FALSE;
++    }
++
+     child->timeout = TRUE;
+     crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid);
+ 
+-    if (kill(child->pid, SIGKILL) < 0) {
+-        if (errno == ESRCH) {
+-            /* Nothing left to do */
+-            return FALSE;
+-        }
+-        crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid);
+-    }
+-
+     child->timerid = g_timeout_add(5000, child_timeout_callback, child);
+     return FALSE;
+ }
+ 
+ static GListPtr child_list = NULL;
+ 
+-static void
+-child_death_dispatch(int signal)
++static gboolean
++child_waitpid(mainloop_child_t *child, int flags)
+ {
+-    GListPtr iter = child_list;
+-
+-    while(iter) {
+-        int rc = 0;
+-        int core = 0;
+-        int signo = 0;
+-        int status = 0;
+-        int exitcode = 0;
+-
+-        GListPtr saved = NULL;
+-        mainloop_child_t *child = iter->data;
++    int rc = 0;
++    int core = 0;
++    int signo = 0;
++    int status = 0;
++    int exitcode = 0;
+ 
+-        rc = waitpid(child->pid, &status, WNOHANG);
+-        if(rc == 0) {
+-            iter = iter->next;
+-            continue;
++    rc = waitpid(child->pid, &status, flags);
++    if(rc == 0) {
++        return FALSE;
+ 
+-        } else if(rc != child->pid) {
+-            signo = signal;
+-            exitcode = 1;
+-            status = 1;
+-            crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid);
++    } else if(rc != child->pid) {
++        signo = SIGCHLD;
++        exitcode = 1;
++        status = 1;
++        crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid);
+ 
+-        } else {
+-            crm_trace("Managed process %d exited: %p", child->pid, child);
++    } else {
++        crm_trace("Managed process %d exited: %p", child->pid, child);
+ 
+-            if (WIFEXITED(status)) {
+-                exitcode = WEXITSTATUS(status);
+-                crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode);
++        if (WIFEXITED(status)) {
++            exitcode = WEXITSTATUS(status);
++            crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode);
+ 
+-            } else if (WIFSIGNALED(status)) {
+-                signo = WTERMSIG(status);
+-                crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo);
+-            }
++        } else if (WIFSIGNALED(status)) {
++            signo = WTERMSIG(status);
++            crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo);
++        }
+ #ifdef WCOREDUMP
+-            if (WCOREDUMP(status)) {
+-                core = 1;
+-                crm_err("Managed process %d (%s) dumped core", child->pid, child->desc);
+-            }
+-#endif
++        if (WCOREDUMP(status)) {
++            core = 1;
++            crm_err("Managed process %d (%s) dumped core", child->pid, child->desc);
+         }
++#endif
++    }
+ 
+-        if (child->callback) {
+-            child->callback(child, child->pid, core, signo, exitcode);
+-        }
++    if (child->callback) {
++        child->callback(child, child->pid, core, signo, exitcode);
++    }
++    return TRUE;
++}
+ 
+-        crm_trace("Removing process entry %p for %d", child, child->pid);
++static void
++child_death_dispatch(int signal)
++{
++    GListPtr iter = child_list;
++    gboolean exited;
++
++    while(iter) {
++        GListPtr saved = NULL;
++        mainloop_child_t *child = iter->data;
++        exited = child_waitpid(child, WNOHANG);
+ 
+         saved = iter;
+         iter = iter->next;
+ 
++        if (exited == FALSE) {
++            continue;
++        }
++        crm_trace("Removing process entry %p for %d", child, child->pid);
++
+         child_list = g_list_remove_link(child_list, saved);
+         g_list_free(saved);
++        child_free(child);
++    }
++}
+ 
+-        if (child->timerid != 0) {
+-            crm_trace("Removing timer %d", child->timerid);
+-            g_source_remove(child->timerid);
+-            child->timerid = 0;
++gboolean
++mainloop_child_kill(pid_t pid)
++{
++    GListPtr iter;
++    mainloop_child_t *child = NULL;
++
++    for (iter = child_list; iter != NULL; iter = iter->next) {
++        child = iter->data;
++        if (pid == child->pid) {
++            break;
+         }
+-        free(child->desc);
+-        free(child);
+     }
++
++    if (child == NULL) {
++        return FALSE;
++    }
++
++    if (child_kill_helper(child) != 0) {
++        /* failed to terminate child process */
++        return FALSE;
++    }
++
++    /* It is impossible to block SIGKILL, this allows us to
++     * call waitpid without WNOHANG here */
++    if (child_waitpid(child, 0) == FALSE) {
++        /* not much we can do if this occurs */
++        return FALSE;
++    }
++
++    child_list = g_list_remove(child_list, child);
++    child_free(child);
++    return TRUE;
+ }
+ 
+ /* Create/Log a new tracked process
+diff --git a/lib/services/services.c b/lib/services/services.c
+index a9a7fd4..ce97bd5 100644
+--- a/lib/services/services.c
++++ b/lib/services/services.c
+@@ -382,8 +382,15 @@ services_action_cancel(const char *name, const char *action, int interval)
+         }
+         services_action_free(op);
+     } else {
+-        crm_info("Cancelling op: %s will occur once operation completes", id);
++        int rc;
++        crm_info("Cancelling in-flight op: performing early termination of %s", id);
+         op->cancel = 1;
++        rc = mainloop_child_kill(op->pid);
++        if (rc != 0 ) {
++            /* even though the early termination failed,
++             * the op will be marked as cancelled once it completes. */
++            crm_err("Termination of %s failed", id);
++        }
+     }
+ 
+     return TRUE;
+diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
+index a3a00ab..9edc749 100644
+--- a/lrmd/lrmd.c
++++ b/lrmd/lrmd.c
+@@ -314,6 +314,11 @@ schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+         return;
+     }
+ 
++    /* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */
++    if (rsc && safe_str_eq(cmd->action, "stop")) {
++        cancel_all_recurring(rsc, NULL);
++    }
++
+     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
+ #ifdef HAVE_SYS_TIMEB_H
+     ftime(&cmd->t_queue);
+@@ -502,11 +507,6 @@ cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
+ 
+     send_cmd_complete_notify(cmd);
+ 
+-    /* crmd expects lrmd to automatically cancel recurring ops after rsc stops */
+-    if (rsc && safe_str_eq(cmd->action, "stop")) {
+-        cancel_all_recurring(rsc, NULL);
+-    }
+-
+     if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
+         if (rsc) {
+             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
+-- 
+1.9.1
+
diff -Nru pacemaker-1.1.10+git20130802/debian/patches/Fix-services-Do-not-allow-duplicate-recurring-op-ent.patch pacemaker-1.1.10+git20130802/debian/patches/Fix-services-Do-not-allow-duplicate-recurring-op-ent.patch
--- pacemaker-1.1.10+git20130802/debian/patches/Fix-services-Do-not-allow-duplicate-recurring-op-ent.patch	1969-12-31 21:00:00.000000000 -0300
+++ pacemaker-1.1.10+git20130802/debian/patches/Fix-services-Do-not-allow-duplicate-recurring-op-ent.patch	2014-08-08 16:09:40.000000000 -0300
@@ -0,0 +1,94 @@
+Description: [PATCH 1/3] Fix: services: Do not allow duplicate recurring op entries
+
+Duplicate recurring operations silently replace each
+other in a way that makes the original entry impossible
+to cancel.  This can cause unexpected monitor failures to
+occur  after resources were thought to have been stopped.
+
+Origin: upstream, commit: 48f90f6
+Author: David Vossel <[email protected]>
+Last-Updated: 2014-08-08
+Bug-Ubuntu: https://bugs.launchpad.net/bugs/1353473
+
+---
+ lib/services/services.c | 49 +++++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 45 insertions(+), 4 deletions(-)
+
+diff --git a/lib/services/services.c b/lib/services/services.c
+index adfc508..373736e 100644
+--- a/lib/services/services.c
++++ b/lib/services/services.c
+@@ -304,6 +304,9 @@ services_action_free(svc_action_t * op)
+         return;
+     }
+ 
++    if (op->opaque->repeat_timer) {
++        g_source_remove(op->opaque->repeat_timer);
++    }
+     if (op->opaque->stderr_gsource) {
+         mainloop_del_fd(op->opaque->stderr_gsource);
+         op->opaque->stderr_gsource = NULL;
+@@ -386,6 +389,44 @@ services_action_cancel(const char *name, const char *action, int interval)
+     return TRUE;
+ }
+ 
++/* add new recurring operation, check for duplicates. 
++ * - if duplicate found, return TRUE, immediately reschedule op.
++ * - if no dup, return FALSE, inserve into recurring op list.*/
++static gboolean
++handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_action_t *))
++{
++    svc_action_t * dup = NULL;
++
++    if (recurring_actions == NULL) {
++        recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL);
++        return FALSE;
++    }
++
++    /* check for duplicates */
++    dup = g_hash_table_lookup(recurring_actions, op->id);
++
++    if (dup && (dup != op)) {
++        /* update user data */
++        if (op->opaque->callback) {
++            dup->opaque->callback = op->opaque->callback;
++            dup->cb_data = op->cb_data;
++            op->cb_data = NULL;
++        }
++        /* immediately execute the next interval */
++        if (dup->pid != 0) {
++            if (op->opaque->repeat_timer) {
++                g_source_remove(op->opaque->repeat_timer);
++            }
++            recurring_action_timer(dup);
++        }
++        /* free the dup.  */
++        services_action_free(op);
++        return TRUE;
++    }
++
++    return FALSE;
++}
++
+ gboolean
+ services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *))
+ {
+@@ -393,11 +434,11 @@ services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *
+         op->opaque->callback = action_callback;
+     }
+ 
+-    if (recurring_actions == NULL) {
+-        recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL);
+-    }
+-
+     if (op->interval > 0) {
++        if (handle_duplicate_recurring(op, action_callback) == TRUE) {
++            /* entry rescheduled, dup freed */
++            return TRUE;
++        }
+         g_hash_table_replace(recurring_actions, op->id, op);
+     }
+ #if SUPPORT_UPSTART
+-- 
+1.9.1
+
diff -Nru pacemaker-1.1.10+git20130802/debian/patches/High-lrmd-Merge-duplicate-recurring-monitor-operatio.patch pacemaker-1.1.10+git20130802/debian/patches/High-lrmd-Merge-duplicate-recurring-monitor-operatio.patch
--- pacemaker-1.1.10+git20130802/debian/patches/High-lrmd-Merge-duplicate-recurring-monitor-operatio.patch	1969-12-31 21:00:00.000000000 -0300
+++ pacemaker-1.1.10+git20130802/debian/patches/High-lrmd-Merge-duplicate-recurring-monitor-operatio.patch	2014-08-08 16:09:43.000000000 -0300
@@ -0,0 +1,230 @@
+Description: [PATCH 2/3] High: lrmd: Merge duplicate recurring monitor operations
+
+Never allow two instances of the same recurring monitor operation
+to exist in the lrmd.
+
+Conflicts:
+	include/crm/services.h
+
+* This conflict was due to different functions declared in services.h.
+* resources_find_service_class() does not exist in 1.1.10.
+
+Origin: upstream, commit: c29ab27
+Author: David Vossel <[email protected]>
+Last-Updated: 2014-08-08
+Bug-Ubuntu: https://bugs.launchpad.net/bugs/1353473
+Signed-off-by: Rafael David Tinoco <[email protected]>
+
+---
+ include/crm/services.h  |  5 +++
+ lib/services/services.c | 29 +++++++++++++++
+ lrmd/lrmd.c             | 93 ++++++++++++++++++++++++++++++++++++++++++-------
+ 3 files changed, 114 insertions(+), 13 deletions(-)
+
+diff --git a/include/crm/services.h b/include/crm/services.h
+index fb5c6b0..6c34782 100644
+--- a/include/crm/services.h
++++ b/include/crm/services.h
+@@ -231,6 +231,11 @@ enum nagios_exitcode {
+                                           int timeout /* ms */ , GHashTable * params);
+ 
+ /**
++ * Kick a recurring action so it is scheduled immediately for re-execution
++ */
++    gboolean services_action_kick(const char *name, const char *action, int interval /* ms */);
++
++/**
+  * Utilize services API to execute an arbitrary command.
+  *
+  * This API has useful infrastructure in place to be able to run a command
+diff --git a/lib/services/services.c b/lib/services/services.c
+index 373736e..a9a7fd4 100644
+--- a/lib/services/services.c
++++ b/lib/services/services.c
+@@ -389,6 +389,35 @@ services_action_cancel(const char *name, const char *action, int interval)
+     return TRUE;
+ }
+ 
++gboolean
++services_action_kick(const char *name, const char *action, int interval /* ms */)
++{
++    svc_action_t * op = NULL;
++    char *id = NULL;
++
++    if (asprintf(&id, "%s_%s_%d", name, action, interval) == -1) {
++        return FALSE;
++    }
++
++    op = g_hash_table_lookup(recurring_actions, id);
++    free(id);
++
++    if (op == NULL) {
++        return FALSE;
++    }
++
++    if (op->pid) {
++        return TRUE;
++    } else {
++        if (op->opaque->repeat_timer) {
++            g_source_remove(op->opaque->repeat_timer);
++        }
++        recurring_action_timer(op);
++        return TRUE;
++    }
++
++}
++
+ /* add new recurring operation, check for duplicates. 
+  * - if duplicate found, return TRUE, immediately reschedule op.
+  * - if no dup, return FALSE, inserve into recurring op list.*/
+diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
+index a4747cb..a3a00ab 100644
+--- a/lrmd/lrmd.c
++++ b/lrmd/lrmd.c
+@@ -122,6 +122,17 @@ log_execute(lrmd_cmd_t * cmd)
+                cmd->rsc_id, cmd->action, cmd->call_id);
+ }
+ 
++static const char *
++normalize_action_name(lrmd_rsc_t * rsc, const char *action)
++{
++    if (safe_str_eq(action, "monitor") &&
++        (safe_str_eq(rsc->class, "lsb") ||
++         safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) {
++        return "status";
++    }
++    return action;
++}
++
+ static lrmd_rsc_t *
+ build_rsc_from_xml(xmlNode * msg)
+ {
+@@ -233,13 +244,76 @@ start_delay_helper(gpointer data)
+     return FALSE;
+ }
+ 
++static gboolean
++merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
++{
++    GListPtr gIter = NULL;
++    lrmd_cmd_t * dup = NULL;
++    gboolean dup_pending = FALSE;
++
++    if (cmd->interval == 0) {
++        return 0;
++    }
++
++    for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
++        dup = gIter->data;
++        if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) {
++            dup_pending = TRUE;
++            goto merge_dup;
++        }
++    }
++
++    /* if dup is in recurring_ops list, that means it has already executed
++     * and is in the interval loop. we can't just remove it in this case. */
++    for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
++        dup = gIter->data;
++        if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) {
++            goto merge_dup;
++        }
++    }
++
++    return FALSE;
++merge_dup:
++
++    /* merge */
++    dup->first_notify_sent = 0;
++    free(dup->userdata_str);
++    dup->userdata_str = cmd->userdata_str;
++    cmd->userdata_str = NULL;
++    dup->call_id = cmd->call_id;
++
++    if (safe_str_eq(rsc->class, "stonith")) {
++        /* if we are waiting for the next interval, kick it off now */
++        if (dup_pending == TRUE) {
++            g_source_remove(cmd->stonith_recurring_id);
++            cmd->stonith_recurring_id = 0;
++            stonith_recurring_op_helper(cmd);
++        }
++
++    } else if (dup_pending == FALSE) {
++        /* if we've already handed this to the service lib, kick off an early execution */
++        services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval);
++    }
++    free_lrmd_cmd(cmd);
++
++    return TRUE;
++}
++
+ static void
+ schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+ {
++    gboolean dup_processed = FALSE;
+     CRM_CHECK(cmd != NULL, return);
+     CRM_CHECK(rsc != NULL, return);
+ 
+     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
++
++    dup_processed = merge_recurring_duplicate(rsc, cmd);
++    if (dup_processed) {
++        /* duplicate recurring cmd found, cmds merged */
++        return;
++    }
++
+     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
+ #ifdef HAVE_SYS_TIMEB_H
+     ftime(&cmd->t_queue);
+@@ -249,7 +323,6 @@ schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+     if (cmd->start_delay) {
+         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
+     }
+-
+ }
+ 
+ static void
+@@ -809,17 +882,6 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+     return rc;
+ }
+ 
+-static const char *
+-normalize_action_name(lrmd_rsc_t * rsc, const char *action)
+-{
+-    if (safe_str_eq(action, "monitor") &&
+-        (safe_str_eq(rsc->class, "lsb") ||
+-         safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) {
+-        return "status";
+-    }
+-    return action;
+-}
+-
+ static void
+ dup_attr(gpointer key, gpointer value, gpointer user_data)
+ {
+@@ -1129,6 +1191,7 @@ process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
+     lrmd_cmd_t *cmd = NULL;
+     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
+     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
++    int call_id;
+ 
+     if (!rsc_id) {
+         return -EINVAL;
+@@ -1140,9 +1203,13 @@ process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
+     }
+ 
+     cmd = create_lrmd_cmd(request, client);
++    call_id = cmd->call_id;
++
++    /* Don't reference cmd after handing it off to be scheduled.
++     * The cmd could get merged and freed. */
+     schedule_lrmd_cmd(rsc, cmd);
+ 
+-    return cmd->call_id;
++    return call_id;
+ }
+ 
+ static int
+-- 
+1.9.1
+
diff -Nru pacemaker-1.1.10+git20130802/debian/patches/series pacemaker-1.1.10+git20130802/debian/patches/series
--- pacemaker-1.1.10+git20130802/debian/patches/series	2013-08-06 08:02:49.000000000 -0300
+++ pacemaker-1.1.10+git20130802/debian/patches/series	2014-08-08 16:10:00.000000000 -0300
@@ -5,3 +5,6 @@
 cli_stop_after_assertion_failure.patch
 gracefully_handle_ECHILD_in_waitpid.patch
 fix_crm_mon_host_list.patch
+Fix-services-Do-not-allow-duplicate-recurring-op-ent.patch
+High-lrmd-Merge-duplicate-recurring-monitor-operatio.patch
+Fix-lrmd-Cancel-recurring-operations-before-stop-act.patch

Reply via email to