The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at 
https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 5216fd4e1597fe5990502fe8d717210e3aebf363
Author: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
Date:   Thu Sep 30 17:43:57 2021 +0300

    fence-watchdog: Add fence-watchdog driver
    
    We need to forbid system to work without a special userspace
    daemon for purposes of HA cluster. So add this watchdog module,
    which will fence the node, if that daemon won't update timer
    value in the file /sys/kernel/watchdog_timer.
    The module is needed for pstorage, so we need to protect network
    from the broken node, so we can put check to net_rx_action.
    
    Signed-off-by: Dmitry Guryanov <dgurya...@parallels.com>
    Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
    
    Rebase: ktkhai@
    
    Putting fence_wdog_jiffies64 in same cacheline with jiffies will
    be in a separate patch: "fence-watchdog: link fence_wdog_jiffies64 and
    jiffies in one cacheline"
    
    Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
    
    (cherry-picked from vz8 commit aef6d38b398b ("fence-watchdog:
    Add fence-watchdog driver"))
    
    Updated use of timekeeping API since 32-bit timespec is no longer
    available.
    
    Applied minor formatting fixes.
    
    Added "CONFIG_FENCE_WATCHDOG=y" to
    redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
    
    Signed-off-by: Nikita Yushchenko <nikita.yushche...@virtuozzo.com>
---
 include/linux/fence-watchdog.h                     |  15 +
 kernel/Kconfig.openvz                              |   4 +
 kernel/Makefile                                    |   1 +
 kernel/fence-watchdog.c                            | 313 +++++++++++++++++++++
 net/core/dev.c                                     |  13 +
 .../custom-overrides/generic/CONFIG_FENCE_WATCHDOG |   1 +
 6 files changed, 347 insertions(+)

diff --git a/include/linux/fence-watchdog.h b/include/linux/fence-watchdog.h
new file mode 100644
index 000000000000..26b542a4080f
--- /dev/null
+++ b/include/linux/fence-watchdog.h
@@ -0,0 +1,15 @@
+/*
+ *  include/linux/fence-watchdog.h
+ *
+ *  Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *  Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+#ifndef _LINUX_FENCE_WATCHDOG_H_
+#define _LINUX_FENCE_WATCHDOG_H_
+
+inline int fence_wdog_check_timer(void);
+bool fence_wdog_tmo_match(void);
+
+#endif
diff --git a/kernel/Kconfig.openvz b/kernel/Kconfig.openvz
index 6c3fbed8ae60..9489342596ab 100644
--- a/kernel/Kconfig.openvz
+++ b/kernel/Kconfig.openvz
@@ -60,4 +60,8 @@ config VZ_EVENT
          networking code does. By now just the notifications of
          the VE essensial status changes are being sent.
 
+config FENCE_WATCHDOG
+       bool "Fencing watchdog for HA cluster support"
+       depends on X86_64
+       default n
 endmenu
diff --git a/kernel/Makefile b/kernel/Makefile
index bf938a777629..6f59a21caa5b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
+obj-$(CONFIG_FENCE_WATCHDOG) += fence-watchdog.o
 
 obj-$(CONFIG_HAS_IOMEM) += iomem.o
 obj-$(CONFIG_RSEQ) += rseq.o
diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
new file mode 100644
index 000000000000..e7fe7d2f3804
--- /dev/null
+++ b/kernel/fence-watchdog.c
@@ -0,0 +1,313 @@
+/*
+ *  kernel/fence-watchdog.c
+ *
+ *  Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *  Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+/*
+ * Provide userspace with an interface to forbid kernel to work
+ * without an userspace daemon.
+ *
+ * The daemon should write number of seconds before fencing to the
+ * file /sys/kernel/watchdog_timer, and must renew it, until the
+ * time elapses.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/jiffies.h>
+#include <linux/reboot.h>
+#include <linux/fence-watchdog.h>
+#include <linux/device.h>
+#include <linux/kmsg_dump.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+
+#define MAX_U64                        (~(u64)0)
+#define MAX_JIFFIES_DELTA      (10 * 365UL * 24UL * 3600UL * HZ)
+#define ACTION_NAME_LEN                16
+
+enum {
+       FENCE_WDOG_CRASH = 0,
+       FENCE_WDOG_REBOOT = 1,
+       FENCE_WDOG_POWEROFF = 2,
+       FENCE_WDOG_NETFILTER = 3,
+};
+
+const char *action_names[] = {"crash", "reboot", "halt", "netfilter", NULL};
+
+unsigned long volatile fence_wdog_jiffies64 = MAX_U64;
+static int fence_wdog_action = FENCE_WDOG_CRASH;
+
+enum {
+       NOT_FENCED = 0,
+       FENCED = 1,
+       FENCED_TIMEOUT = 2,
+};
+
+static atomic_t fence_stage = ATOMIC_INIT(NOT_FENCED);
+static char fence_wdog_log_path[PATH_MAX] = "/fence_wdog.log";
+
+#define SECS_PER_MIN   60
+#define PREFIX_LEN     39
+
+static int print_prefix(char *msg) {
+       struct timespec64 ts;
+       struct tm tm;
+
+       ktime_get_real_ts64(&ts);
+       time64_to_tm(ts.tv_sec - sys_tz.tz_minuteswest * SECS_PER_MIN, 0, &tm);
+
+       return snprintf(msg, PREFIX_LEN, "[%02d:%02d:%02d/%04ld-%02d-%02d] 
fence-watchdog: ",
+                       tm.tm_hour, tm.tm_min, tm.tm_sec,
+                       tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+}
+
+#define MSG_LEN (PREFIX_LEN + 10)
+
+void fence_wdog_log(void)
+{
+       char msg[MSG_LEN];
+       struct file *file;
+       int ret, len;
+
+       ret = print_prefix(msg);
+       if (ret < 0)
+               return;
+
+       len = strlen(msg);
+
+       ret = snprintf(msg + len, MSG_LEN - len, "%s\n", 
action_names[fence_wdog_action]);
+       if (ret != strlen(action_names[fence_wdog_action]) + 1) {
+               printk(KERN_EMERG "fence-watchdog: Failed to sprintf msg\n");
+               return;
+       }
+
+       file = filp_open(fence_wdog_log_path,
+                        O_CREAT | O_WRONLY | O_APPEND | O_NOFOLLOW | 
O_LARGEFILE,
+                        0600);
+       if (IS_ERR(file)) {
+               printk(KERN_EMERG "fence-watchdog: Failed to open log path\n");
+               return;
+       }
+
+       if (!S_ISREG(file_inode(file)->i_mode)) {
+               printk(KERN_EMERG "fence-watchdog: Wrong type of log file\n");
+               goto close;
+       }
+
+       ret = kernel_write(file, msg, strlen(msg), &file->f_pos);
+       if (ret < 0) {
+               printk(KERN_EMERG "fence-watchdog: Failed to write msg, 
ret=%d\n", ret);
+               goto close;
+       }
+
+       ret = vfs_fsync(file, 0);
+       if (ret < 0)
+               printk(KERN_EMERG "fence-watchdog: Failed to fsync log file 
ret=%d\n", ret);
+
+close:
+       ret = filp_close(file, NULL);
+       if (ret < 0)
+               printk(KERN_EMERG "fence-watchdog: Failed to close log file 
ret=%d\n", ret);
+
+       return;
+}
+
+static void do_halt_or_reboot(struct work_struct *dummy)
+{
+       printk(KERN_EMERG "fence-watchdog: %s\n",
+              action_names[fence_wdog_action]);
+
+       fence_wdog_log();
+
+       switch (fence_wdog_action) {
+       case FENCE_WDOG_REBOOT:
+               emergency_restart();
+               break;
+       case FENCE_WDOG_POWEROFF:
+               kernel_halt();
+               break;
+       }
+}
+
+static DECLARE_WORK(halt_or_reboot_work, do_halt_or_reboot);
+
+void fence_wdog_do_fence(void)
+{
+       if (fence_wdog_action == FENCE_WDOG_CRASH ||
+                       atomic_read(&fence_stage) == FENCED_TIMEOUT)
+               panic("fence-watchdog: %s\n",
+                     action_names[fence_wdog_action]);
+       else
+               schedule_work(&halt_or_reboot_work);
+}
+
+#define FENCE_WDOG_TIMEOUT 30
+
+inline int fence_wdog_check_timer(void)
+{
+       if (unlikely(get_jiffies_64() > fence_wdog_jiffies64 &&
+                       fence_wdog_action != FENCE_WDOG_NETFILTER)) {
+               if (atomic_cmpxchg(&fence_stage, NOT_FENCED, FENCED) == 
NOT_FENCED
+                   || (get_jiffies_64() > fence_wdog_jiffies64
+                   + FENCE_WDOG_TIMEOUT * HZ
+                   && atomic_cmpxchg(&fence_stage, FENCED, FENCED_TIMEOUT) == 
FENCED))
+                       fence_wdog_do_fence();
+
+               return 1;
+       }
+
+       return 0;
+}
+
+bool fence_wdog_tmo_match(void)
+{
+       return get_jiffies_64() > fence_wdog_jiffies64;
+}
+EXPORT_SYMBOL(fence_wdog_tmo_match);
+
+static ssize_t fence_wdog_timer_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       ssize_t ret;
+       u64 jiffies_delta = fence_wdog_jiffies64 - get_jiffies_64();
+       struct timespec64 t;
+
+       if (jiffies_delta > MAX_JIFFIES_DELTA) {
+               ret =  sprintf(buf, "inf\n");
+       } else {
+               jiffies_to_timespec64(jiffies_delta, &t);
+               ret =  sprintf(buf, "%lld\n", t.tv_sec);
+       }
+
+       return ret;
+}
+
+static ssize_t fence_wdog_timer_store(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf, size_t count)
+{
+       unsigned long long val;
+       unsigned long jiffies_delta;
+       struct timespec64 t;
+
+       if (kstrtoull(buf, 10, &val))
+               return -EINVAL;
+
+       if (val == 0) {
+               fence_wdog_jiffies64 = MAX_U64;
+               return count;
+       }
+
+       t.tv_sec = val;
+       t.tv_nsec = 0;
+
+       jiffies_delta = timespec64_to_jiffies(&t);
+       if (jiffies_delta > MAX_JIFFIES_DELTA)
+               return -EINVAL;
+
+       fence_wdog_jiffies64 = get_jiffies_64() + jiffies_delta;
+
+       return count;
+}
+
+static ssize_t fence_wdog_action_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%s\n", action_names[fence_wdog_action]);
+}
+
+static ssize_t fence_wdog_action_store(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf, size_t count)
+{
+       char str_action[ACTION_NAME_LEN];
+       int i = 0;
+
+       if (sscanf(buf, "%15s", str_action) != 1)
+               return -EINVAL;
+
+       for (i = 0; action_names[i]; i++) {
+               if ((!strncasecmp(str_action, action_names[i], 
ACTION_NAME_LEN))) {
+                       fence_wdog_action = i;
+                       return count;
+               }
+       }
+
+       return -EINVAL;
+}
+
+static ssize_t fence_wdog_available_actions_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       int i, ret = 0;
+
+       for (i = 0; action_names[i] != NULL; i++)
+               ret += sprintf(&buf[ret], "%s ", action_names[i]);
+
+       ret += sprintf(&buf[ret], "\n");
+       return ret;
+}
+
+static ssize_t fence_wdog_log_path_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%s\n", fence_wdog_log_path);
+}
+
+#define STORE_FORMAT_LEN 16
+
+static ssize_t fence_wdog_log_path_store(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf, size_t count)
+{
+       char format[STORE_FORMAT_LEN];
+       int ret;
+
+       ret = snprintf(format, STORE_FORMAT_LEN, "%%%ds", PATH_MAX - 1);
+       if (ret < 0)
+               return ret;
+
+
+       if (sscanf(buf, format, fence_wdog_log_path) != 1)
+               return -EINVAL;
+       return 0;
+}
+
+static struct kobj_attribute fence_wdog_timer_attr =
+       __ATTR(watchdog_timer, 0644,
+               fence_wdog_timer_show, fence_wdog_timer_store);
+
+static struct kobj_attribute fence_wdog_action_attr =
+       __ATTR(watchdog_action, 0644,
+               fence_wdog_action_show, fence_wdog_action_store);
+
+static struct kobj_attribute fence_wdog_available_actions_attr =
+       __ATTR(watchdog_available_actions, 0644,
+               fence_wdog_available_actions_show, NULL);
+
+static struct kobj_attribute fence_wdog_log_path_attr =
+       __ATTR(watchdog_log_path, 0644,
+               fence_wdog_log_path_show, fence_wdog_log_path_store);
+
+static struct attribute *fence_wdog_attrs[] = {
+       &fence_wdog_timer_attr.attr,
+       &fence_wdog_action_attr.attr,
+       &fence_wdog_available_actions_attr.attr,
+       &fence_wdog_log_path_attr.attr,
+       NULL,
+};
+
+static struct attribute_group fence_wdog_attr_group = {
+       .attrs = fence_wdog_attrs,
+};
+
+static int __init fence_wdog_init(void)
+{
+       sysfs_update_group(kernel_kobj, &fence_wdog_attr_group);
+       return 0;
+}
+
+module_init(fence_wdog_init)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3500c9544d27..21b0e5ff5eaf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,6 +151,7 @@
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
 #include <linux/ve.h>
+#include <linux/fence-watchdog.h>
 
 #include "net-sysfs.h"
 
@@ -3669,6 +3670,14 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff 
*first, struct net_device *de
        struct sk_buff *skb = first;
        int rc = NETDEV_TX_OK;
 
+#ifdef CONFIG_FENCE_WATCHDOG
+       if (unlikely(fence_wdog_check_timer())) {
+               kfree_skb(skb);
+               *ret = rc;
+               return NULL;
+       }
+#endif
+
        while (skb) {
                struct sk_buff *next = skb->next;
 
@@ -7189,6 +7198,10 @@ static __latent_entropy void net_rx_action(struct 
softirq_action *h)
        list_splice_init(&sd->poll_list, &list);
        local_irq_enable();
 
+#ifdef CONFIG_FENCE_WATCHDOG
+       fence_wdog_check_timer();
+#endif
+
        for (;;) {
                struct napi_struct *n;
 
diff --git a/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG 
b/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
new file mode 100644
index 000000000000..434aac2b336a
--- /dev/null
+++ b/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
@@ -0,0 +1 @@
+CONFIG_FENCE_WATCHDOG=y
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to