Add a new command: 'set-alert-config', which configures device's warning alert.

Signed-off-by: Jehoon Park <[email protected]>
---
 Documentation/cxl/cxl-set-alert-config.txt |  96 +++++++++
 Documentation/cxl/meson.build              |   1 +
 cxl/builtin.h                              |   1 +
 cxl/cxl.c                                  |   1 +
 cxl/memdev.c                               | 220 ++++++++++++++++++++-
 5 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/cxl/cxl-set-alert-config.txt

diff --git a/Documentation/cxl/cxl-set-alert-config.txt 
b/Documentation/cxl/cxl-set-alert-config.txt
new file mode 100644
index 0000000..c905f7c
--- /dev/null
+++ b/Documentation/cxl/cxl-set-alert-config.txt
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+cxl-set-alert-config(1)
+=======================
+
+NAME
+----
+cxl-set-alert-config - set the warning alert threshold on a CXL memdev
+
+SYNOPSIS
+--------
+[verse]
+'cxl set-alert-config <mem0> [<mem1>..<memN>] [<options>]'
+
+DESCRIPTION
+-----------
+CXL device raises an alert when its health status is changed. Critical alert
+shall automatically be configured by the device after a device reset.
+If supported, programmable warning thresholds also be initialized to vendor
+recommended defaults, then could be configured by the user.
+
+Use this command to configure warning alert thresholds of a device.
+Having issued this command, the newly requested warning thresholds would
+override the previously programmed warning thresholds.
+
+To enable warning alert, set both 'threshold=value' and 'alert=on'. To disable
+warning alert, set only 'alert=off'. Other cases would cause errors.
+
+Use "cxl list -m <memdev> -A" to examine the programming warning threshold
+capabilities of a device.
+
+EXAMPLES
+--------
+Set warning threshold to 30 and enable alert for life used.
+[verse]
+cxl set-alert-config mem0 -L 30 --life-used-alert=on
+
+Disable warning alert for device over temperature.
+[verse]
+cxl set-alert-config mem0 --over-temperature-alert=off
+
+OPTIONS
+-------
+<memory device(s)>::
+include::memdev-option.txt[]
+
+-v::
+--verbose=::
+        Turn on verbose debug messages in the library (if libcxl was built with
+        logging and debug enabled).
+
+-L::
+--life-used-threshold=::
+       Set <value> for the life used warning alert threshold.
+
+--life-used-alert=::
+       Enable or disable the life used warning alert.
+       Options are 'on' or 'off'.
+
+-O::
+--over-temperature-threshold=::
+       Set <value> for the device over temperature warning alert threshold.
+
+--over-temperature-alert=::
+       Enable or disable the device over temperature warning alert.
+       Options are 'on' or 'off'.
+
+-U::
+--under-temperature-threshold=::
+       Set <value> for the device under temperature warning alert threshold.
+
+--under-temperature-alert=::
+       Enable or disable the device under temperature warning alert.
+       Options are 'on' or 'off'.
+
+-V::
+--volatile-mem-err-threshold=::
+       Set <value> for the corrected volatile memory error warning alert
+       threshold.
+
+--volatile-mem-err-alert=::
+       Enable or disable the corrected volatile memory error warning alert.
+       Options are 'on' or 'off'.
+
+-P::
+--pmem-err-threshold=::
+       Set <value> for the corrected persistent memory error warning alert
+       threshold.
+
+--pmem-err-alert=::
+       Enable or disable the corrected persistent memory error warning alert.
+       Options are 'on' or 'off'.
+
+SEE ALSO
+--------
+CXL-3.0 8.2.9.8.3.3
diff --git a/Documentation/cxl/meson.build b/Documentation/cxl/meson.build
index c553357..865aad5 100644
--- a/Documentation/cxl/meson.build
+++ b/Documentation/cxl/meson.build
@@ -47,6 +47,7 @@ cxl_manpages = [
   'cxl-destroy-region.txt',
   'cxl-monitor.txt',
   'cxl-update-firmware.txt',
+  'cxl-set-alert-config.txt',
 ]
 
 foreach man : cxl_manpages
diff --git a/cxl/builtin.h b/cxl/builtin.h
index 3ec6c6c..2c46a82 100644
--- a/cxl/builtin.h
+++ b/cxl/builtin.h
@@ -15,6 +15,7 @@ int cmd_enable_memdev(int argc, const char **argv, struct 
cxl_ctx *ctx);
 int cmd_reserve_dpa(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_free_dpa(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_update_fw(int argc, const char **argv, struct cxl_ctx *ctx);
+int cmd_set_alert_config(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_disable_port(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_enable_port(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_set_partition(int argc, const char **argv, struct cxl_ctx *ctx);
diff --git a/cxl/cxl.c b/cxl/cxl.c
index e1524b8..bf4822f 100644
--- a/cxl/cxl.c
+++ b/cxl/cxl.c
@@ -69,6 +69,7 @@ static struct cmd_struct commands[] = {
        { "reserve-dpa", .c_fn = cmd_reserve_dpa },
        { "free-dpa", .c_fn = cmd_free_dpa },
        { "update-firmware", .c_fn = cmd_update_fw },
+       { "set-alert-config", .c_fn = cmd_set_alert_config },
        { "disable-port", .c_fn = cmd_disable_port },
        { "enable-port", .c_fn = cmd_enable_port },
        { "set-partition", .c_fn = cmd_set_partition },
diff --git a/cxl/memdev.c b/cxl/memdev.c
index f6a2d3f..2dd2e7f 100644
--- a/cxl/memdev.c
+++ b/cxl/memdev.c
@@ -38,10 +38,38 @@ static struct parameters {
        const char *type;
        const char *size;
        const char *decoder_filter;
+       const char *life_used_threshold;
+       const char *dev_over_temperature_threshold;
+       const char *dev_under_temperature_threshold;
+       const char *corrected_volatile_mem_err_threshold;
+       const char *corrected_pmem_err_threshold;
+       const char *life_used_alert;
+       const char *dev_over_temperature_alert;
+       const char *dev_under_temperature_alert;
+       const char *corrected_volatile_mem_err_alert;
+       const char *corrected_pmem_err_alert;
 } param;
 
 static struct log_ctx ml;
 
+struct alert_context {
+       int valid_alert_actions;
+       int enable_alert_actions;
+       int life_used_threshold;
+       int dev_over_temperature_threshold;
+       int dev_under_temperature_threshold;
+       int corrected_volatile_mem_err_threshold;
+       int corrected_pmem_err_threshold;
+};
+
+enum cxl_setalert_event {
+       CXL_SETALERT_LIFE_USED,
+       CXL_SETALERT_OVER_TEMP,
+       CXL_SETALERT_UNDER_TEMP,
+       CXL_SETALERT_VOLATILE_MEM_ERROR,
+       CXL_SETALERT_PMEM_ERROR,
+};
+
 enum cxl_setpart_type {
        CXL_SETPART_PMEM,
        CXL_SETPART_VOLATILE,
@@ -99,6 +127,36 @@ OPT_BOOLEAN('c', "cancel", &param.cancel,                   
         \
 OPT_BOOLEAN('w', "wait", &param.wait,                                \
            "wait for firmware update to complete before returning")
 
+#define SET_ALERT_OPTIONS()                                                   \
+OPT_STRING('L', "life-used-threshold", &param.life_used_threshold,            \
+          "threshold", "threshold value for life used warning alert"),       \
+OPT_STRING('\0', "life-used-alert", &param.life_used_alert,                   \
+          "'on' or 'off'", "enable or disable life used warning alert"),     \
+OPT_STRING('O', "over-temperature-threshold",                                 \
+          &param.dev_over_temperature_threshold, "threshold",                \
+          "threshold value for device over temperature warning alert"),      \
+OPT_STRING('\0', "over-temperature-alert",                                    \
+          &param.dev_over_temperature_alert, "'on' or 'off'",                \
+          "enable or disable device over temperature warning alert"),        \
+OPT_STRING('U', "under-temperature-threshold",                                \
+          &param.dev_under_temperature_threshold, "threshold",               \
+          "threshold value for device under temperature warning alert"),     \
+OPT_STRING('\0', "under-temperature-alert",                                   \
+          &param.dev_under_temperature_alert, "'on' or 'off'",               \
+          "enable or disable device under temperature warning alert"),       \
+OPT_STRING('V', "volatile-mem-err-threshold",                                 \
+          &param.corrected_volatile_mem_err_threshold, "threshold",          \
+          "threshold value for corrected volatile mem error warning alert"), \
+OPT_STRING('\0', "volatile-mem-err-alert",                                    \
+          &param.corrected_volatile_mem_err_alert, "'on' or 'off'",          \
+          "enable or disable corrected volatile mem error warning alert"),   \
+OPT_STRING('P', "pmem-err-threshold",                                         \
+          &param.corrected_pmem_err_threshold, "threshold",                  \
+          "threshold value for corrected pmem error warning alert"),         \
+OPT_STRING('\0', "pmem-err-alert",                                            \
+          &param.corrected_pmem_err_alert, "'on' or 'off'",                  \
+          "enable or disable corrected pmem error warning alert")
+
 static const struct option read_options[] = {
        BASE_OPTIONS(),
        LABEL_OPTIONS(),
@@ -155,6 +213,12 @@ static const struct option update_fw_options[] = {
        OPT_END(),
 };
 
+static const struct option set_alert_options[] = {
+       BASE_OPTIONS(),
+       SET_ALERT_OPTIONS(),
+       OPT_END(),
+};
+
 enum reserve_dpa_mode {
        DPA_ALLOC,
        DPA_FREE,
@@ -706,6 +770,148 @@ static int action_update_fw(struct cxl_memdev *memdev,
        return rc;
 }
 
+static int validate_alert_threshold(enum cxl_setalert_event event,
+                                   int threshold)
+{
+       if (event == CXL_SETALERT_LIFE_USED) {
+               if (threshold < 0 || threshold > 100) {
+                       log_err(&ml, "Invalid life used threshold: %d\n",
+                               threshold);
+                       return -EINVAL;
+               }
+       } else if (event == CXL_SETALERT_OVER_TEMP ||
+                  event == CXL_SETALERT_UNDER_TEMP) {
+               if (threshold < SHRT_MIN || threshold > SHRT_MAX) {
+                       log_err(&ml,
+                               "Invalid device temperature threshold: %d\n",
+                               threshold);
+                       return -EINVAL;
+               }
+       } else {
+               if (threshold < 0 || threshold > USHRT_MAX) {
+                       log_err(&ml,
+                               "Invalid corrected mem error threshold: %d\n",
+                               threshold);
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+#define alert_param_set_threshold(arg, alert_event)                           \
+{                                                                             \
+       if (!param.arg##_alert) {                                             \
+               if (param.arg##_threshold) {                                  \
+                       log_err(&ml, "Action not specified\n");               \
+                       return -EINVAL;                                       \
+               }                                                             \
+       } else if (strcmp(param.arg##_alert, "on") == 0) {                    \
+               if (param.arg##_threshold) {                                  \
+                       char *endptr;                                         \
+                       alertctx.arg##_threshold =                            \
+                               strtol(param.arg##_threshold, &endptr, 10);   \
+                       if (endptr[0] != '\0') {                              \
+                               log_err(&ml, "Invalid threshold: %s\n",       \
+                                       param.arg##_threshold);               \
+                               return -EINVAL;                               \
+                       }                                                     \
+                       rc = validate_alert_threshold(                        \
+                               alert_event, alertctx.arg##_threshold);       \
+                       if (rc != 0)                                          \
+                               return rc;                                    \
+                       alertctx.valid_alert_actions |= 1 << alert_event;     \
+                       alertctx.enable_alert_actions |= 1 << alert_event;    \
+               } else {                                                      \
+                       log_err(&ml, "Threshold not specified\n");            \
+                       return -EINVAL;                                       \
+               }                                                             \
+       } else if (strcmp(param.arg##_alert, "off") == 0) {                   \
+               if (!param.arg##_threshold) {                                 \
+                       alertctx.valid_alert_actions |= 1 << alert_event;     \
+                       alertctx.enable_alert_actions &= ~(1 << alert_event); \
+               } else {                                                      \
+                       log_err(&ml, "Disable not require threshold\n");      \
+                       return -EINVAL;                                       \
+               }                                                             \
+       } else {                                                              \
+               log_err(&ml, "Invalid action: %s\n", param.arg##_alert);      \
+               return -EINVAL;                                               \
+       }                                                                     \
+}
+
+#define setup_threshold_field(arg)                                            \
+{                                                                             \
+       if (param.arg##_threshold)                                            \
+               cxl_cmd_alert_config_set_##arg##_prog_warn_threshold(         \
+                       cmd, alertctx.arg##_threshold);                       \
+}
+
+static int action_set_alert_config(struct cxl_memdev *memdev,
+                                  struct action_context *actx)
+{
+       const char *devname = cxl_memdev_get_devname(memdev);
+       struct cxl_cmd *cmd;
+       struct alert_context alertctx = { 0 };
+       struct json_object *jmemdev;
+       unsigned long flags;
+       int rc = 0;
+
+       alert_param_set_threshold(life_used, CXL_SETALERT_LIFE_USED)
+       alert_param_set_threshold(dev_over_temperature, CXL_SETALERT_OVER_TEMP)
+       alert_param_set_threshold(dev_under_temperature,
+                                 CXL_SETALERT_UNDER_TEMP)
+       alert_param_set_threshold(corrected_volatile_mem_err,
+                                 CXL_SETALERT_VOLATILE_MEM_ERROR)
+       alert_param_set_threshold(corrected_pmem_err, CXL_SETALERT_PMEM_ERROR)
+       if (alertctx.valid_alert_actions == 0) {
+               log_err(&ml, "No action specified\n");
+               return -EINVAL;
+       }
+
+       cmd = cxl_cmd_new_set_alert_config(memdev);
+       if (!cmd) {
+               rc = -ENXIO;
+               goto out_err;
+       }
+
+       setup_threshold_field(life_used)
+       setup_threshold_field(dev_over_temperature)
+       setup_threshold_field(dev_under_temperature)
+       setup_threshold_field(corrected_volatile_mem_err)
+       setup_threshold_field(corrected_pmem_err)
+       cxl_cmd_alert_config_set_valid_alert_actions(
+               cmd, alertctx.valid_alert_actions);
+       cxl_cmd_alert_config_set_enable_alert_actions(
+               cmd, alertctx.enable_alert_actions);
+
+       rc = cxl_cmd_submit(cmd);
+       if (rc < 0) {
+               log_err(&ml, "cmd submission failed: %s\n", strerror(-rc));
+               goto out_cmd;
+       }
+
+       rc = cxl_cmd_get_mbox_status(cmd);
+       if (rc != 0) {
+               log_err(&ml, "%s: mbox status: %d\n", __func__, rc);
+               rc = -ENXIO;
+       }
+
+out_cmd:
+       cxl_cmd_unref(cmd);
+out_err:
+       if (rc)
+               log_err(&ml, "%s error: %s\n", devname, strerror(-rc));
+
+       flags = UTIL_JSON_ALERT_CONFIG;
+       if (actx->f_out == stdout && isatty(1))
+               flags |= UTIL_JSON_HUMAN;
+       jmemdev = util_cxl_memdev_to_json(memdev, flags);
+       if (actx->jdevs && jmemdev)
+               json_object_array_add(actx->jdevs, jmemdev);
+
+       return rc;
+}
+
 static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
                         int (*action)(struct cxl_memdev *memdev,
                                       struct action_context *actx),
@@ -749,7 +955,8 @@ static int memdev_action(int argc, const char **argv, 
struct cxl_ctx *ctx,
        }
 
        if (action == action_setpartition || action == action_reserve_dpa ||
-           action == action_free_dpa || action == action_update_fw)
+           action == action_free_dpa || action == action_update_fw ||
+           action == action_set_alert_config)
                actx.jdevs = json_object_new_array();
 
        if (err == argc) {
@@ -968,3 +1175,14 @@ int cmd_update_fw(int argc, const char **argv, struct 
cxl_ctx *ctx)
 
        return count >= 0 ? 0 : EXIT_FAILURE;
 }
+
+int cmd_set_alert_config(int argc, const char **argv, struct cxl_ctx *ctx)
+{
+       int count = memdev_action(
+               argc, argv, ctx, action_set_alert_config, set_alert_options,
+               "cxl set-alert-config <mem0> [<mem1>..<memN>] [<options>]");
+       log_info(&ml, "set alert configuration for %d mem%s\n",
+                count >= 0 ? count : 0, count > 1 ? "s" : "");
+
+       return count >= 0 ? 0 : EXIT_FAILURE;
+}
-- 
2.17.1


Reply via email to