Hello community,

here is the log from the commit of package sbd for openSUSE:Factory checked in 
at 2019-08-05 10:41:15
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/sbd (Old)
 and      /work/SRC/openSUSE:Factory/.sbd.new.4126 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "sbd"

Mon Aug  5 10:41:15 2019 rev:30 rq:720707 version:1.4.0+20190612.398628b

Changes:
--------
--- /work/SRC/openSUSE:Factory/sbd/sbd.changes  2019-07-08 15:10:43.275327599 
+0200
+++ /work/SRC/openSUSE:Factory/.sbd.new.4126/sbd.changes        2019-08-05 
10:41:44.775296318 +0200
@@ -1,0 +2,14 @@
+Tue Jul 02 15:26:03 UTC 2019 - Yan Gao <y...@suse.com>
+
+- Update to version 1.4.0+20190612.398628b:
+- sbd-cluster: periodically check corosync-daemon liveness
+- sbd-pacemaker: assume graceful exit if leftovers are unmanged
+- sbd-common: query rt-budget > 0 otherwise try moving to root-slice 
(bsc#1143064)
+
+-------------------------------------------------------------------
+Tue Jul  2 14:38:08 UTC 2019 - jtomas...@suse.com
+
+- sbd-cluster: Fix 100% CPU usage when CMAP connection is lost (bsc#1140065, 
SOC-8774)
+  * bsc#1140065-Fix-sbd-cluster-exit-if-cmap-is-disconnected.patch
+
+-------------------------------------------------------------------

Old:
----
  sbd-1.4.0+20190514.e9be8d9.tar.xz

New:
----
  bsc#1140065-Fix-sbd-cluster-exit-if-cmap-is-disconnected.patch
  sbd-1.4.0+20190612.398628b.tar.xz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ sbd.spec ++++++
--- /var/tmp/diff_new_pack.ONzdG8/_old  2019-08-05 10:41:45.183296271 +0200
+++ /var/tmp/diff_new_pack.ONzdG8/_new  2019-08-05 10:41:45.183296271 +0200
@@ -23,13 +23,14 @@
 %endif
 
 Name:           sbd
-Version:        1.4.0+20190514.e9be8d9
+Version:        1.4.0+20190612.398628b
 Release:        0
 Summary:        Storage-based death
 License:        GPL-2.0-or-later
 Group:          Productivity/Clustering/HA
 Url:            https://github.com/ClusterLabs/sbd
 Source:         %{name}-%{version}.tar.xz
+Patch1:         bsc#1140065-Fix-sbd-cluster-exit-if-cmap-is-disconnected.patch
 BuildRequires:  autoconf
 BuildRequires:  automake
 BuildRequires:  e2fsprogs-devel

++++++ _servicedata ++++++
--- /var/tmp/diff_new_pack.ONzdG8/_old  2019-08-05 10:41:45.211296267 +0200
+++ /var/tmp/diff_new_pack.ONzdG8/_new  2019-08-05 10:41:45.215296267 +0200
@@ -1,6 +1,6 @@
 <servicedata>
   <service name="tar_scm">
     <param name="url">https://github.com/ClusterLabs/sbd.git</param>
-    <param 
name="changesrevision">13d85f249b34e17eccf5ecb7527134c65fcf25a0</param>
+    <param 
name="changesrevision">398628bbcc896be4da21d273392f2fe6391319e2</param>
   </service>
 </servicedata>
\ No newline at end of file

++++++ bsc#1140065-Fix-sbd-cluster-exit-if-cmap-is-disconnected.patch ++++++
diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c
index 541212f..b315e8b 100644
--- a/src/sbd-cluster.c
+++ b/src/sbd-cluster.c
@@ -35,6 +35,18 @@
 
 #if CHECK_TWO_NODE
 #include <glib-unix.h>
+// available since glib 2.58
+#ifndef G_SOURCE_FUNC
+#define G_SOURCE_FUNC(f) ((GSourceFunc) (void (*)(void)) (f))
+#endif
+// available since glib 2.32
+#ifndef G_SOURCE_REMOVE
+#define G_SOURCE_REMOVE FALSE
+#endif
+// available since glib 2.32
+#ifndef G_SOURCE_CONTINUE
+#define G_SOURCE_CONTINUE TRUE
+#endif
 #endif
 
 #include "sbd.h"
@@ -55,6 +67,7 @@ static int reconnect_msec = 1000;
 static GMainLoop *mainloop = NULL;
 static guint notify_timer = 0;
 static crm_cluster_t cluster;
+static void clean_up(int rc);
 static gboolean sbd_remote_check(gpointer user_data);
 static long unsigned int find_pacemaker_remote(void);
 static void sbd_membership_destroy(gpointer user_data);
@@ -168,10 +181,19 @@ static void sbd_cmap_notify_fn(
 }
 
 static gboolean
-cmap_dispatch_callback (gpointer user_data)
+cmap_dispatch_callback (gint cmap_fd,
+                        GIOCondition condition,
+                        gpointer user_data)
 {
+    /* CMAP connection lost */
+    if (condition & G_IO_HUP) {
+        cl_log(LOG_WARNING, "CMAP service connection lost\n");
+        clean_up(EXIT_CLUSTER_DISCONNECT);
+        /* remove the source from the main loop */
+        return G_SOURCE_REMOVE; /* never reached */
+    }
     cmap_dispatch(cmap_handle, CS_DISPATCH_ALL);
-    return TRUE;
+    return G_SOURCE_CONTINUE;
 }
 
 static void
@@ -222,7 +244,7 @@ sbd_get_two_node(void)
             cl_log(LOG_WARNING, "Couldn't create source for cmap\n");
             goto out;
         }
-        g_source_set_callback(cmap_source, cmap_dispatch_callback, NULL, NULL);
+        g_source_set_callback(cmap_source, 
G_SOURCE_FUNC(cmap_dispatch_callback), NULL, NULL);
         g_source_attach(cmap_source, NULL);
     }
 
@@ -533,6 +555,15 @@ find_pacemaker_remote(void)
                             * just to be back where we started
                             */
 #endif
+
+#if SUPPORT_COROSYNC && CHECK_TWO_NODE
+    cmap_destroy();
+#endif
+
+    if (rc >= 0) {
+        exit(rc);
+    }
+
     return;
 }
 
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index abde4e5..fcb867c 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -526,6 +526,20 @@ void inquisitor_child(void)
                                                                        break;
                                                        }
                                                }
+                                       } else if (sbd_is_cluster(s)) {
+                                               if (WIFEXITED(status)) {
+                                                       
switch(WEXITSTATUS(status)) {
+                                                               case 
EXIT_CLUSTER_DISCONNECT:
+                                                                       
cl_log(LOG_WARNING, "Cluster-Servant has exited (connection lost)");
+                                                                       
s->restarts = 0;
+                                                                       
s->restart_blocked = 0;
+                                                                       
s->outdated = 1;
+                                                                       
s->t_last.tv_sec = 0;
+                                                                       break;
+                                                               default:
+                                                                       break;
+                                                       }
+                                               }
                                        }
                                        cleanup_servant_by_pid(pid);
                                }
diff --git a/src/sbd.h b/src/sbd.h
index 3b05a11..45244ab 100644
--- a/src/sbd.h
+++ b/src/sbd.h
@@ -62,6 +62,9 @@
 /* exit status for pcmk-servant */
 #define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30
 
+/* exit status for cluster-servant */
+#define EXIT_CLUSTER_DISCONNECT 40
+
 #define HOG_CHAR       0xff
 #define SECTOR_NAME_MAX 63
 
++++++ sbd-1.4.0+20190514.e9be8d9.tar.xz -> sbd-1.4.0+20190612.398628b.tar.xz 
++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sbd-1.4.0+20190514.e9be8d9/configure.ac 
new/sbd-1.4.0+20190612.398628b/configure.ac
--- old/sbd-1.4.0+20190514.e9be8d9/configure.ac 2019-05-14 14:00:48.000000000 
+0200
+++ new/sbd-1.4.0+20190612.398628b/configure.ac 2019-06-12 18:36:24.000000000 
+0200
@@ -33,6 +33,7 @@
 dnl PKG_CHECK_MODULES(libcoroipcc, [libcoroipcc])
 
 PKG_CHECK_MODULES(cmap, [libcmap], HAVE_cmap=1, HAVE_cmap=0)
+PKG_CHECK_MODULES(votequorum, [libvotequorum], HAVE_votequorum=1, 
HAVE_votequorum=0)
 
 dnl pacemaker > 1.1.8
 PKG_CHECK_MODULES(pacemaker, [pacemaker, pacemaker-cib], HAVE_pacemaker=1, 
HAVE_pacemaker=0)
@@ -47,9 +48,14 @@
 elif test $HAVE_pacemaker = 1; then
     CPPFLAGS="$CPPFLAGS $glib_CFLAGS $pacemaker_CFLAGS"
     if test $HAVE_cmap = 0; then
-        AC_MSG_NOTICE(No package 'cmap' found)
+        AC_MSG_NOTICE(No library 'cmap' found)
     else
-       CPPFLAGS="$CPPFLAGS $cmap_CFLAGS"
+        CPPFLAGS="$CPPFLAGS $cmap_CFLAGS"
+    fi
+    if test $HAVE_votequorum = 0; then
+        AC_MSG_NOTICE(No library 'votequorum' found)
+    else
+        CPPFLAGS="$CPPFLAGS $votequorum_CFLAGS"
     fi
 fi
 
@@ -66,6 +72,7 @@
 AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes")
 AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes")
 AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0)
+AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0)
 
 dnl pacemaker >= 1.1.8
 AC_CHECK_HEADERS(pacemaker/crm/cluster.h)
@@ -107,6 +114,9 @@
 AC_DEFINE_UNQUOTED(CHECK_TWO_NODE, $HAVE_cmap, Turn on checking for 2-node 
cluster)
 AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1")
 
+AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic 
checking of votequorum-handle)
+AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1")
+
 CONFIGDIR=""
 AC_ARG_WITH(configdir,
     [  --with-configdir=DIR
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sbd-1.4.0+20190514.e9be8d9/src/sbd-cluster.c 
new/sbd-1.4.0+20190612.398628b/src/sbd-cluster.c
--- old/sbd-1.4.0+20190514.e9be8d9/src/sbd-cluster.c    2019-05-14 
14:00:48.000000000 +0200
+++ new/sbd-1.4.0+20190612.398628b/src/sbd-cluster.c    2019-06-12 
18:36:24.000000000 +0200
@@ -80,6 +80,12 @@
 
 #if SUPPORT_COROSYNC
 
+#if CHECK_VOTEQUORUM_HANDLE
+#include <corosync/votequorum.h>
+
+static votequorum_handle_t votequorum_handle = 0;
+#endif
+
 static bool two_node = false;
 static bool ever_seen_both = false;
 static int cpg_membership_entries = -1;
@@ -261,12 +267,32 @@
 
 #endif
         case pcmk_cluster_corosync:
+            do {
+#if SUPPORT_COROSYNC && CHECK_VOTEQUORUM_HANDLE
+                struct votequorum_info info;
+
+                if (votequorum_getinfo(votequorum_handle, 0, &info) != CS_OK) {
+
+                    votequorum_finalize(votequorum_handle);
+                    if (votequorum_initialize(&votequorum_handle, NULL) != 
CS_OK) {
+                        votequorum_handle = 0;
+                        break;
+                    }
+                    if (votequorum_getinfo(votequorum_handle, 0, &info) != 
CS_OK) {
+                        break;
+                    }
+                }
+#endif
+                notify_parent();
+            } while (0);
+            break;
+
 #if HAVE_DECL_PCMK_CLUSTER_CMAN
         case pcmk_cluster_cman:
-#endif
-            /* TODO - Make a CPG call and only call notify_parent() when we 
get a reply */
+
             notify_parent();
             break;
+#endif
 
         default:
             break;
@@ -533,6 +559,12 @@
 static void
 clean_up(int rc)
 {
+#if CHECK_VOTEQUORUM_HANDLE
+    votequorum_finalize(votequorum_handle);
+    votequorum_handle = 0; /* there isn't really an invalid handle value
+                            * just to be back where we started
+                            */
+#endif
     return;
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sbd-1.4.0+20190514.e9be8d9/src/sbd-common.c 
new/sbd-1.4.0+20190612.398628b/src/sbd-common.c
--- old/sbd-1.4.0+20190514.e9be8d9/src/sbd-common.c     2019-05-14 
14:00:48.000000000 +0200
+++ new/sbd-1.4.0+20190612.398628b/src/sbd-common.c     2019-06-12 
18:36:24.000000000 +0200
@@ -662,6 +662,112 @@
 #endif
 }
 
+static int get_realtime_budget(void)
+{
+    FILE *f;
+    char fname[PATH_MAX];
+    int res = -1, lnum = 0;
+    char *cgroup = NULL, *namespecs = NULL;
+
+    snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid());
+    f = fopen(fname, "rt");
+    if (f == NULL) {
+        cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd",
+                            (intmax_t)getpid());
+        goto exit_res;
+    }
+    while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum,  &namespecs, &cgroup) !=EOF ) 
{
+        if (namespecs && strstr(namespecs, "cpuacct")) {
+            free(namespecs);
+            break;
+        }
+        if (cgroup) {
+            free(cgroup);
+            cgroup = NULL;
+        }
+        if (namespecs) {
+            free(namespecs);
+            namespecs = NULL;
+        }
+    }
+    fclose(f);
+    if (cgroup == NULL) {
+        cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd",
+                            (intmax_t)getpid());
+        goto exit_res;
+    }
+    snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us",
+                              cgroup);
+    f = fopen(fname, "rt");
+    if (f == NULL) {
+        cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but "
+            "doesn't for '%s'", cgroup);
+        goto exit_res;
+    }
+    if (fscanf(f, "%d", &res) != 1) {
+        cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname);
+    } else {
+        cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res);
+    }
+    fclose(f);
+
+exit_res:
+    if (cgroup) {
+        free(cgroup);
+    }
+    return res;
+}
+
+/* stolen from corosync */
+static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
+    FILE *f;
+    int res = -1;
+
+    /*
+     * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
+     * using systemd and systemd uses hardcoded path of cgroup mount point.
+     *
+     * This feature is expected to be removed as soon as systemd gets support
+     * for managing RT configuration.
+     */
+    f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
+    if (f == NULL) {
+        cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
+            "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
+        res = 0;
+        goto exit_res;
+    }
+    fclose(f);
+
+    if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
+        cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are "
+                          "-> skip moving to root-slice");
+        res = 0;
+        goto exit_res;
+    }
+
+    f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
+    if (f == NULL) {
+        cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
+
+        goto exit_res;
+    }
+
+    if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
+        cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
+        goto close_and_exit_res;
+    }
+
+close_and_exit_res:
+    if (fclose(f) != 0) {
+        cl_log(LOG_WARNING, "Can't close cgroups tasks file");
+        goto exit_res;
+    }
+
+exit_res:
+    return (res);
+}
+
 void
 sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
 {
@@ -670,6 +776,10 @@
     }
 
 #ifdef SCHED_RR
+    if (move_to_root_cgroup) {
+        sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup);
+    }
+
     {
         int pcurrent = 0;
         int pmin = sched_get_priority_min(SCHED_RR);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sbd-1.4.0+20190514.e9be8d9/src/sbd-inquisitor.c 
new/sbd-1.4.0+20190612.398628b/src/sbd-inquisitor.c
--- old/sbd-1.4.0+20190514.e9be8d9/src/sbd-inquisitor.c 2019-05-14 
14:00:48.000000000 +0200
+++ new/sbd-1.4.0+20190612.398628b/src/sbd-inquisitor.c 2019-06-12 
18:36:24.000000000 +0200
@@ -33,6 +33,8 @@
 char*  pidfile = NULL;
 bool do_flush = true;
 char timeout_sysrq_char = 'b';
+bool move_to_root_cgroup = true;
+bool enforce_moving_to_root_cgroup = false;
 
 int parse_device_line(const char *line);
 
@@ -965,6 +967,19 @@
             timeout_action = strdup(value);
         }
 
+        value = getenv("SBD_MOVE_TO_ROOT_CGROUP");
+        if(value) {
+            move_to_root_cgroup = crm_is_true(value);
+
+            if (move_to_root_cgroup) {
+               enforce_moving_to_root_cgroup = true;
+            } else {
+                if (strcmp(value, "auto") == 0) {
+                    move_to_root_cgroup = true;
+                }
+            }
+        }
+
        while ((c = getopt(argc, argv, 
"czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
                switch (c) {
                case 'D':
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sbd-1.4.0+20190514.e9be8d9/src/sbd-pacemaker.c 
new/sbd-1.4.0+20190612.398628b/src/sbd-pacemaker.c
--- old/sbd-1.4.0+20190514.e9be8d9/src/sbd-pacemaker.c  2019-05-14 
14:00:48.000000000 +0200
+++ new/sbd-1.4.0+20190612.398628b/src/sbd-pacemaker.c  2019-06-12 
18:36:24.000000000 +0200
@@ -333,11 +333,41 @@
         }
     }
 
+    /* If we are in shutdown-state once this will go on till the end.
+     * If we've on top reached a state of 0 locally running resources
+     * we can assume a clean shutdown.
+     * Tricky are the situations where the node is in maintenance-mode
+     * or resources are unmanaged. So if the node is in maintenance or
+     * all left-over running resources are unmanaged we assume intention.
+     */
     if (node->details->shutdown) {
         pcmk_shutdown = 1;
     }
-    if (pcmk_shutdown && !(node->details->running_rsc)) {
+    if (pcmk_shutdown)
+    {
         pcmk_clean_shutdown = 1;
+        if (!(node->details->maintenance)) {
+            GListPtr iter;
+
+            for (iter = node->details->running_rsc;
+                 iter != NULL; iter = iter->next) {
+                resource_t *rsc = (resource_t *) iter->data;
+
+
+                if (is_set(rsc->flags, pe_rsc_managed)) {
+                    pcmk_clean_shutdown = 0;
+                    crm_debug("not clean as %s managed and still running",
+                              rsc->id);
+                    break;
+                }
+            }
+            if (pcmk_clean_shutdown) {
+                crm_debug("pcmk_clean_shutdown because "
+                          "all managed resources down");
+            }
+        } else {
+            crm_debug("pcmk_clean_shutdown because node is in maintenance");
+        }
     }
     notify_parent();
     return;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sbd-1.4.0+20190514.e9be8d9/src/sbd.h 
new/sbd-1.4.0+20190612.398628b/src/sbd.h
--- old/sbd-1.4.0+20190514.e9be8d9/src/sbd.h    2019-05-14 14:00:48.000000000 
+0200
+++ new/sbd-1.4.0+20190612.398628b/src/sbd.h    2019-06-12 18:36:24.000000000 
+0200
@@ -159,6 +159,8 @@
 extern char*  local_uname;
 extern bool do_flush;
 extern char timeout_sysrq_char;
+extern bool move_to_root_cgroup;
+extern bool enforce_moving_to_root_cgroup;
 
 /* Global, non-tunable variables: */
 extern int  sector_size;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sbd-1.4.0+20190514.e9be8d9/src/sbd.sysconfig 
new/sbd-1.4.0+20190612.398628b/src/sbd.sysconfig
--- old/sbd-1.4.0+20190514.e9be8d9/src/sbd.sysconfig    2019-05-14 
14:00:48.000000000 +0200
+++ new/sbd-1.4.0+20190612.398628b/src/sbd.sysconfig    2019-06-12 
18:36:24.000000000 +0200
@@ -91,6 +91,20 @@
 #
 SBD_TIMEOUT_ACTION=flush,reboot
 
+## Type: yesno / auto
+## Default: auto
+#
+# If CPUAccounting is enabled default is not to assign any RT-budget
+# to the system.slice which prevents sbd from running RR-scheduled.
+#
+# One way to escape that issue is to move sbd-processes from the
+# slice they were originally started to root-slice.
+# Of course starting sbd in a certain slice might be intentional.
+# Thus in auto-mode sbd will check if the slice has RT-budget assigned.
+# If that is the case sbd will stay in that slice while it will
+# be moved to root-slice otherwise.
+SBD_MOVE_TO_ROOT_CGROUP=auto
+
 ## Type: string
 ## Default: ""
 #


Reply via email to