Exercise the full driver path on real Grace and Vera hardware using
ACPI EINJ to inject CPER sections and validate the kernel log output.
KUnit covers the parser in isolation; this test covers the path from
firmware notification through GUID dispatch to decoded output.

Signed-off-by: Kai-Heng Feng <[email protected]>
---
 tools/testing/selftests/firmware/Makefile     |   4 +-
 tools/testing/selftests/firmware/config       |   5 +
 tools/testing/selftests/firmware/einj_lib.sh  | 189 ++++++++++++++++++
 .../selftests/firmware/ghes_nvidia_einj.sh    | 144 +++++++++++++
 .../firmware/ghes_nvidia_einj_profiles.sh     |  46 +++++
 5 files changed, 386 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/firmware/einj_lib.sh
 create mode 100755 tools/testing/selftests/firmware/ghes_nvidia_einj.sh
 create mode 100755 
tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh

diff --git a/tools/testing/selftests/firmware/Makefile 
b/tools/testing/selftests/firmware/Makefile
index 7992969deaa2..b753dd123860 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -3,8 +3,8 @@
 CFLAGS = -Wall \
          -O2

-TEST_PROGS := fw_run_tests.sh
-TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh
+TEST_PROGS := fw_run_tests.sh ghes_nvidia_einj.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh 
einj_lib.sh ghes_nvidia_einj_profiles.sh
 TEST_GEN_FILES := fw_namespace

 include ../lib.mk
diff --git a/tools/testing/selftests/firmware/config 
b/tools/testing/selftests/firmware/config
index 6e402519b117..1b68e638d0b7 100644
--- a/tools/testing/selftests/firmware/config
+++ b/tools/testing/selftests/firmware/config
@@ -4,3 +4,8 @@ CONFIG_FW_LOADER_USER_HELPER=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_FW_UPLOAD=y
+CONFIG_DEBUG_FS=y
+CONFIG_ACPI_APEI=y
+CONFIG_ACPI_APEI_GHES=y
+CONFIG_ACPI_APEI_EINJ=y
+CONFIG_ACPI_APEI_GHES_NVIDIA=y
diff --git a/tools/testing/selftests/firmware/einj_lib.sh 
b/tools/testing/selftests/firmware/einj_lib.sh
new file mode 100644
index 000000000000..ca569a9fe5b0
--- /dev/null
+++ b/tools/testing/selftests/firmware/einj_lib.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+EINJ_TABLE=/sys/firmware/acpi/tables/EINJ
+EINJ_DEBUGFS=/sys/kernel/debug/apei/einj
+NVIDIA_PLATFORM_GLOB=/sys/bus/platform/devices/NVDA2012:*
+NVIDIA_DRIVER_DIR=/sys/bus/platform/drivers/nvidia-ghes
+
+einj_skip()
+{
+       echo "$0: $1" >&2
+       exit $ksft_skip
+}
+
+einj_require_root()
+{
+       [ "$(id -u)" -eq 0 ] || einj_skip "must be run as root"
+}
+
+einj_require_debugfs()
+{
+       [ -d /sys/kernel/debug ] || einj_skip "debugfs is not mounted at 
/sys/kernel/debug"
+}
+
+einj_require_einj()
+{
+       [ -e "$EINJ_TABLE" ] || einj_skip "ACPI EINJ table is missing"
+       if [ ! -d "$EINJ_DEBUGFS" ]; then
+               modprobe einj 2>/dev/null || true
+       fi
+       [ -d "$EINJ_DEBUGFS" ] || einj_skip "EINJ debugfs directory is missing"
+}
+
+einj_require_vendor_einj()
+{
+       [ -e "$EINJ_DEBUGFS/vendor" ] || einj_skip "NVIDIA vendor EINJ metadata 
is missing"
+       [ -e "$EINJ_DEBUGFS/vendor_flags" ] || einj_skip "NVIDIA vendor EINJ 
flags are missing"
+}
+
+einj_require_available_error_type()
+{
+       local available
+
+       available=$(einj_read_trimmed_value available_error_type)
+       [ -n "$available" ] || einj_skip "available_error_type is missing"
+}
+
+einj_read_trimmed_value()
+{
+       local file=$1
+
+       einj_read_value "$file" | tr -d '\n'
+}
+
+einj_require_writable_value()
+{
+       local file=$1
+
+       [ -w "$EINJ_DEBUGFS/$file" ] || einj_skip "$file is not writable"
+}
+
+einj_require_writable_profile()
+{
+       local file
+
+       for file in error_type flags vendor_flags param1 param2 param3 param4 
notrigger; do
+               einj_require_writable_value "$file"
+       done
+}
+
+einj_find_bound_nvidia_device()
+{
+       local dev
+
+       for dev in $NVIDIA_PLATFORM_GLOB; do
+               [ -e "$dev" ] || continue
+               if [ "$(readlink -f "$dev/driver" 2>/dev/null)" = 
"$NVIDIA_DRIVER_DIR" ]; then
+                       echo "$dev"
+                       return 0
+               fi
+       done
+
+       return 1
+}
+
+einj_require_bound_nvidia_device()
+{
+       local dev
+
+       dev=$(einj_find_bound_nvidia_device) || einj_skip "no bound NVIDIA GHES 
device"
+       echo "$dev"
+}
+
+einj_read_value()
+{
+       local file=$1
+
+       cat "$EINJ_DEBUGFS/$file"
+}
+
+einj_write_value()
+{
+       local file=$1
+       local value=$2
+
+       printf '%s\n' "$value" > "$EINJ_DEBUGFS/$file"
+}
+
+einj_restore_value()
+{
+       local file=$1
+       local value=$2
+
+       # Some EINJ controls read back as an empty string when unset, but the
+       # debugfs write handler has no matching "clear" operation.
+       [ -n "$value" ] || return 0
+       einj_write_value "$file" "$value"
+}
+
+einj_save_state()
+{
+       EINJ_SAVED_ERROR_TYPE=$(einj_read_value error_type)
+       EINJ_SAVED_FLAGS=$(einj_read_value flags)
+       EINJ_SAVED_PARAM1=$(einj_read_value param1)
+       EINJ_SAVED_PARAM2=$(einj_read_value param2)
+       EINJ_SAVED_PARAM3=$(einj_read_value param3)
+       EINJ_SAVED_PARAM4=$(einj_read_value param4)
+       EINJ_SAVED_VENDOR_FLAGS=$(einj_read_value vendor_flags)
+       EINJ_SAVED_NOTRIGGER=$(einj_read_value notrigger)
+}
+
+einj_restore_state()
+{
+       [ -n "${EINJ_SAVED_ERROR_TYPE+x}" ] || return 0
+
+       einj_restore_value error_type "$EINJ_SAVED_ERROR_TYPE"
+       einj_restore_value flags "$EINJ_SAVED_FLAGS"
+       einj_restore_value param1 "$EINJ_SAVED_PARAM1"
+       einj_restore_value param2 "$EINJ_SAVED_PARAM2"
+       einj_restore_value param3 "$EINJ_SAVED_PARAM3"
+       einj_restore_value param4 "$EINJ_SAVED_PARAM4"
+       einj_restore_value vendor_flags "$EINJ_SAVED_VENDOR_FLAGS"
+       einj_restore_value notrigger "$EINJ_SAVED_NOTRIGGER"
+}
+
+einj_emit_kmsg_marker()
+{
+       local tag=$1
+       local marker
+
+       marker="ghes-nvidia-einj:${tag}:$$:${RANDOM}"
+       printf '%s\n' "$marker" > /dev/kmsg
+       printf '%s\n' "$marker"
+}
+
+einj_capture_dmesg_after_marker()
+{
+       local marker=$1
+
+       dmesg | awk -v marker="$marker" '
+               found { print }
+               index($0, marker) { found = 1 }
+       '
+}
+
+einj_wait_for_dmesg_after_marker_contains()
+{
+       local marker=$1
+       local needle=$2
+       local timeout=${3:-10}
+       local i
+       local slice
+
+       for i in $(seq 1 "$timeout"); do
+               slice=$(einj_capture_dmesg_after_marker "$marker")
+               if printf '%s\n' "$slice" | grep -Fq "$needle"; then
+                       printf '%s\n' "$slice"
+                       return 0
+               fi
+               sleep 1
+       done
+
+       return 1
+}
diff --git a/tools/testing/selftests/firmware/ghes_nvidia_einj.sh 
b/tools/testing/selftests/firmware/ghes_nvidia_einj.sh
new file mode 100755
index 000000000000..6fc4d3189235
--- /dev/null
+++ b/tools/testing/selftests/firmware/ghes_nvidia_einj.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+TEST_DIR=$(dirname "$0")
+source "$TEST_DIR/einj_lib.sh"
+source "$TEST_DIR/ghes_nvidia_einj_profiles.sh"
+
+einj_assert_nvidia_cper_output()
+{
+       local profile=$1
+       local output=$2
+
+       if printf '%s\n' "$output" | grep -Fq 'Malformed NVIDIA'; then
+               echo "$0: $profile produced malformed NVIDIA CPER output" >&2
+               printf '%s\n' "$output" >&2
+               return 1
+       fi
+
+       if printf '%s\n' "$output" | grep -Fq 'NVIDIA Grace CPER section'; then
+               if ! printf '%s\n' "$output" | grep -Fq 'signature:'; then
+                       echo "$0: $profile Grace output missing signature line" 
>&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               if ! printf '%s\n' "$output" | grep -Fq 'error_type:'; then
+                       echo "$0: $profile Grace output missing error_type 
line" >&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               if ! printf '%s\n' "$output" | grep -Fq 'number_regs:'; then
+                       echo "$0: $profile Grace output missing number_regs 
line" >&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               if ! printf '%s\n' "$output" | grep -Fq 'instance_base:'; then
+                       echo "$0: $profile Grace output missing instance_base 
line" >&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               return 0
+       fi
+
+       if printf '%s\n' "$output" | grep -Fq 'NVIDIA Vera CPER section'; then
+               if ! printf '%s\n' "$output" | grep -Fq 'signature:'; then
+                       echo "$0: $profile Vera output missing signature line" 
>&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               if ! printf '%s\n' "$output" | grep -Fq 'event_type:'; then
+                       echo "$0: $profile Vera output missing event_type line" 
>&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               if ! printf '%s\n' "$output" | grep -Fq 'event_sub_type:'; then
+                       echo "$0: $profile Vera output missing event_sub_type 
line" >&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               if ! printf '%s\n' "$output" | grep -Fq 'event_context_count:'; 
then
+                       echo "$0: $profile Vera output missing 
event_context_count line" >&2
+                       printf '%s\n' "$output" >&2
+                       return 1
+               fi
+               return 0
+       fi
+
+       echo "$0: $profile did not emit a recognized NVIDIA CPER section" >&2
+       printf '%s\n' "$output" >&2
+       return 1
+}
+
+einj_run_profile()
+{
+       local profile=$1
+       local marker
+       local output
+
+       if ! einj_select_profile "$profile"; then
+               echo "$0: unknown safe NVIDIA EINJ profile: $profile" >&2
+               return 1
+       fi
+
+       einj_require_writable_profile
+
+       printf '%s: running safe sample %s\n' "$0" "$profile"
+       marker=$(einj_emit_kmsg_marker "$profile")
+
+       einj_write_value error_type "$EINJ_PROFILE_ERROR_TYPE"
+       einj_write_value flags 0
+       einj_write_value vendor_flags "$EINJ_PROFILE_VENDOR_FLAGS"
+       einj_write_value param1 "$EINJ_PROFILE_PARAM1"
+       einj_write_value param2 "$EINJ_PROFILE_PARAM2"
+       einj_write_value param3 "$EINJ_PROFILE_PARAM3"
+       einj_write_value param4 "$EINJ_PROFILE_PARAM4"
+       einj_write_value notrigger 0
+       einj_write_value error_inject 1
+
+       output=$(einj_wait_for_dmesg_after_marker_contains "$marker" 
"$EINJ_PROFILE_BANNER" 10) || {
+               printf '%s: %s not supported on this platform\n' "$0" "$profile"
+               return "$ksft_skip"
+       }
+
+       einj_assert_nvidia_cper_output "$profile" "$output"
+}
+
+einj_cleanup()
+{
+       local status=$1
+
+       if ! einj_restore_state; then
+               echo "$0: failed to restore EINJ state" >&2
+               [ "$status" -eq 0 ] && status=1
+       fi
+
+       exit "$status"
+}
+
+main()
+{
+       local profile
+       local passed=0
+
+       einj_require_root
+       einj_require_debugfs
+       einj_require_einj
+       einj_require_vendor_einj
+       einj_require_available_error_type
+       einj_save_state
+       trap 'einj_cleanup "$?"' EXIT
+
+       einj_require_bound_nvidia_device
+
+       for profile in $(einj_list_profiles); do
+               einj_run_profile "$profile" && passed=$((passed + 1)) || {
+                       [ "$?" -eq "$ksft_skip" ] || exit 1
+               }
+       done
+
+       [ "$passed" -gt 0 ] || einj_skip "no NVIDIA EINJ profiles produced 
output"
+}
+
+main "$@"
diff --git a/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh 
b/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh
new file mode 100755
index 000000000000..b25461d2238c
--- /dev/null
+++ b/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# Run both architecture profiles on every platform; firmware silently ignores
+# selectors it does not support, so a timeout just means "not this platform".
+EINJ_PROFILE_NAMES="cmet_dump_status_grace cmet_dump_status_vera"
+
+einj_list_profiles()
+{
+       printf '%s\n' $EINJ_PROFILE_NAMES
+}
+
+einj_select_profile()
+{
+       local profile=$1
+
+       case "$profile" in
+       cmet_dump_status_grace)
+               # Grace CMET dump/status: informational sample, selector 3.
+               EINJ_PROFILE_ERROR_TYPE=0x80000010
+               EINJ_PROFILE_VENDOR_FLAGS=1
+               EINJ_PROFILE_PARAM1=3
+               EINJ_PROFILE_PARAM2=0
+               EINJ_PROFILE_PARAM3=0
+               EINJ_PROFILE_PARAM4=0
+               EINJ_PROFILE_BANNER='NVIDIA Grace CPER section'
+               ;;
+       cmet_dump_status_vera)
+               # Vera CMET-NULL dump/status: informational sample, selector 0.
+               EINJ_PROFILE_ERROR_TYPE=0x80000010
+               EINJ_PROFILE_VENDOR_FLAGS=1
+               EINJ_PROFILE_PARAM1=0
+               EINJ_PROFILE_PARAM2=0
+               EINJ_PROFILE_PARAM3=0
+               EINJ_PROFILE_PARAM4=0
+               EINJ_PROFILE_BANNER='NVIDIA Vera CPER section'
+               ;;
+       *)
+               return 1
+               ;;
+       esac
+
+       return 0
+}
-- 
2.50.1 (Apple Git-155)


Reply via email to