From: Darrick J. Wong <[email protected]>

Use Kent Overstreet's thread_with_file abstraction to provide a magic
file from which we can read filesystem health events.

Signed-off-by: Darrick J. Wong <[email protected]>
---
 fs/xfs/Kconfig                 |    9 +++
 fs/xfs/Makefile                |    1 
 fs/xfs/libxfs/xfs_fs.h         |    1 
 fs/xfs/libxfs/xfs_fs_staging.h |   10 +++
 fs/xfs/xfs_healthmon.c         |  129 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_healthmon.h         |   15 +++++
 fs/xfs/xfs_ioctl.c             |   21 +++++++
 fs/xfs/xfs_linux.h             |    3 +
 8 files changed, 189 insertions(+)
 create mode 100644 fs/xfs/xfs_healthmon.c
 create mode 100644 fs/xfs/xfs_healthmon.h


diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index e0fa9b382fbeb..dd22cf799328a 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -6,6 +6,7 @@ config XFS_FS
        select LIBCRC32C
        select FS_IOMAP
        select TIME_STATS if XFS_TIME_STATS
+       select THREAD_WITH_FILE if XFS_HEALTH_MONITOR
        help
          XFS is a high performance journaling filesystem which originated
          on the SGI IRIX platform.  It is completely multi-threaded, can
@@ -128,6 +129,14 @@ config XFS_TIME_STATS
        help
          Collects time statistics on various operations in the filesystem.
 
+config XFS_HEALTH_MONITOR
+       bool "Report filesystem health events to userspace"
+       depends on XFS_FS
+       select XFS_LIVE_HOOKS
+       default y
+       help
+         Report health events to userspace programs.
+
 config XFS_DRAIN_INTENTS
        bool
        select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index bf3bacfb7afff..563936e48ab39 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -154,6 +154,7 @@ xfs-$(CONFIG_XFS_LIVE_HOOKS)        += xfs_hooks.o
 xfs-$(CONFIG_XFS_MEMORY_BUFS)  += xfs_buf_mem.o
 xfs-$(CONFIG_XFS_BTREE_IN_MEM) += libxfs/xfs_btree_mem.o
 xfs-$(CONFIG_XFS_TIME_STATS)   += xfs_timestats.o
+xfs-$(CONFIG_XFS_HEALTH_MONITOR) += xfs_healthmon.o
 
 # online scrub/repair
 ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 246c2582abbe5..b9d9bc511475d 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -855,6 +855,7 @@ struct xfs_scrub_metadata {
 #define XFS_IOC_FSGETXATTRA    _IOR ('X', 45, struct fsxattr)
 /*     XFS_IOC_SETBIOSIZE ---- deprecated 46      */
 /*     XFS_IOC_GETBIOSIZE ---- deprecated 47      */
+/*     XFS_IOC_HEALTHMON -------- staging 48      */
 #define XFS_IOC_GETBMAPX       _IOWR('X', 56, struct getbmap)
 #define XFS_IOC_ZERO_RANGE     _IOW ('X', 57, struct xfs_flock64)
 #define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
diff --git a/fs/xfs/libxfs/xfs_fs_staging.h b/fs/xfs/libxfs/xfs_fs_staging.h
index 1da182c77934d..84b99816eec2e 100644
--- a/fs/xfs/libxfs/xfs_fs_staging.h
+++ b/fs/xfs/libxfs/xfs_fs_staging.h
@@ -303,4 +303,14 @@ struct xfs_map_freesp {
  */
 #define XFS_IOC_MAP_FREESP     _IOWR('X', 64, struct xfs_map_freesp)
 
+struct xfs_health_monitor {
+       __u64   flags;          /* flags */
+       __u8    format;         /* output format */
+       __u8    pad1[7];        /* zeroes */
+       __u64   pad2[2];        /* zeroes */
+};
+
+/* Monitor for health events. */
+#define XFS_IOC_HEALTH_MONITOR         _IOR ('X', 48, struct 
xfs_health_monitor)
+
 #endif /* __XFS_FS_STAGING_H__ */
diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c
new file mode 100644
index 0000000000000..9b4da8d1e5173
--- /dev/null
+++ b/fs/xfs/xfs_healthmon.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+#include "xfs_health.h"
+#include "xfs_ag.h"
+#include "xfs_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_quota_defs.h"
+#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
+
+/*
+ * Live Health Monitoring
+ * ======================
+ *
+ * Autonomous self-healing of XFS filesystems requires a means for the kernel
+ * to send filesystem health events to a monitoring daemon in userspace.  To
+ * accomplish this, we establish a thread_with_file kthread object to handle
+ * translating internal events about filesystem health into a format that can
+ * be parsed easily by userspace.  Then we hook various parts of the filesystem
+ * to supply those internal events to the kthread.  Userspace reads events
+ * from the file descriptor returned by the ioctl.
+ *
+ * The healthmon abstraction has a weak reference to the host filesystem mount
+ * so that the queueing and processing of the events do not pin the mount and
+ * cannot slow down the main filesystem.  The healthmon object can exist past
+ * the end of the filesystem mount.
+ */
+
+struct xfs_healthmon {
+       /* thread with stdio redirection */
+       struct thread_with_stdio        thread;
+};
+
+static inline struct xfs_healthmon *
+to_healthmon(struct thread_with_stdio  *thr)
+{
+       return container_of(thr, struct xfs_healthmon, thread);
+}
+
+/* Free the health monitoring information. */
+STATIC void
+xfs_healthmon_exit(
+       struct thread_with_stdio        *thr)
+{
+       struct xfs_healthmon            *hm = to_healthmon(thr);
+
+       kfree(hm);
+       module_put(THIS_MODULE);
+}
+
+/* Pipe health monitoring information to userspace. */
+STATIC void
+xfs_healthmon_run(
+       struct thread_with_stdio        *thr)
+{
+}
+
+/* Validate ioctl parameters. */
+static inline bool
+xfs_healthmon_validate(
+       const struct xfs_health_monitor *hmo)
+{
+       if (hmo->flags)
+               return false;
+       if (hmo->format)
+               return false;
+       if (memchr_inv(&hmo->pad1, 0, sizeof(hmo->pad1)))
+               return false;
+       if (memchr_inv(&hmo->pad2, 0, sizeof(hmo->pad2)))
+               return false;
+       return true;
+}
+
+static const struct thread_with_stdio_ops xfs_healthmon_ops = {
+       .exit           = xfs_healthmon_exit,
+       .fn             = xfs_healthmon_run,
+};
+
+/*
+ * Create a health monitoring file.  Returns an index to the fd table or a
+ * negative errno.
+ */
+int
+xfs_healthmon_create(
+       struct xfs_mount                *mp,
+       struct xfs_health_monitor       *hmo)
+{
+       struct xfs_healthmon            *hm;
+       int                             ret;
+
+       if (!xfs_healthmon_validate(hmo))
+               return -EINVAL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (!try_module_get(THIS_MODULE))
+               return -ENOMEM;
+
+       hm = kzalloc(sizeof(*hm), GFP_KERNEL);
+       if (!hm) {
+               ret = -ENOMEM;
+               goto out_mod;
+       }
+
+       ret = run_thread_with_stdout(&hm->thread, &xfs_healthmon_ops);
+       if (ret < 0)
+               goto out_hm;
+
+       return ret;
+out_hm:
+       kfree(hm);
+out_mod:
+       module_put(THIS_MODULE);
+       return ret;
+}
diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h
new file mode 100644
index 0000000000000..a9a8115ec770b
--- /dev/null
+++ b/fs/xfs/xfs_healthmon.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#ifndef __XFS_HEALTHMON_H__
+#define __XFS_HEALTHMON_H__
+
+#ifdef CONFIG_XFS_HEALTH_MONITOR
+int xfs_healthmon_create(struct xfs_mount *mp, struct xfs_health_monitor *hmo);
+#else
+# define xfs_healthmon_create(mp, hmo)         (-EOPNOTSUPP)
+#endif /* CONFIG_XFS_HEALTH_MONITOR */
+
+#endif /* __XFS_HEALTHMON_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d592ceb26c3e5..270127300ba02 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -44,6 +44,7 @@
 #include "xfs_file.h"
 #include "xfs_exchrange.h"
 #include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
 
 #include <linux/mount.h>
 #include <linux/namei.h>
@@ -2429,6 +2430,23 @@ xfs_ioc_map_freesp(
 # define xfs_ioc_map_freesp(...)               (-ENOTTY)
 #endif
 
+#ifdef CONFIG_XFS_EXPERIMENTAL_IOCTLS
+STATIC int
+xfs_ioc_health_monitor(
+       struct xfs_mount                *mp,
+       struct xfs_health_monitor __user *arg)
+{
+       struct xfs_health_monitor       hmo;
+
+       if (copy_from_user(&hmo, arg, sizeof(hmo)))
+               return -EFAULT;
+
+       return xfs_healthmon_create(mp, &hmo);
+}
+#else
+# define xfs_ioc_health_monitor(...)           (-ENOTTY)
+#endif
+
 /*
  * These long-unused ioctls were removed from the official ioctl API in 5.17,
  * but retain these definitions so that we can log warnings about them.
@@ -2685,6 +2703,9 @@ xfs_file_ioctl(
        case XFS_IOC_MAP_FREESP:
                return xfs_ioc_map_freesp(filp, arg);
 
+       case XFS_IOC_HEALTH_MONITOR:
+               return xfs_ioc_health_monitor(mp, arg);
+
        default:
                return -ENOTTY;
        }
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 8598294514aa3..02dc0aba4e728 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -69,6 +69,9 @@ typedef __u32                 xfs_nlink_t;
 # include <linux/time_stats.h>
 #endif
 #include <linux/sched/clock.h>
+#ifdef CONFIG_XFS_HEALTH_MONITOR
+# include <linux/thread_with_file.h>
+#endif
 
 #include <asm/page.h>
 #include <asm/div64.h>


Reply via email to