On 8/22/25 7:59 PM, [email protected] wrote:
> From: Alison Schofield <[email protected]>
>
> The CXL kernel driver recently added support to inject and clear
> poison in a region by specifying an offset. Add a test case to the
> existing cxl-poison unit test that demonstrates how to use the new
> debugfs attributes. Use the kernel trace log to validate the round
> trip address translations.
>
> SKIP, do not fail, if the new debugfs attributes are not present.
>
> See the kernel ABI documentation for usage:
> Documentation/ABI/testing/debugfs-cxl
>
> Signed-off-by: Alison Schofield <[email protected]>
Reviewed-by: Dave Jiang <[email protected]>
Not bash expert, but LGTM.
> ---
>
> Changes in v3:
> Replace string compare with boolean value for expect_fail (Marc)
> Add local declarations in new or modified funcs (Marc)
> De-duplicate clear & poison funcs (Marc)
> Remove stderr redirection (Marc)
>
> Changes in v2:
> Add test_poison_by_region_offset_negative set of test cases
>
>
> test/cxl-poison.sh | 132 +++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> index 6ed890bc666c..f941f3cbcffd 100644
> --- a/test/cxl-poison.sh
> +++ b/test/cxl-poison.sh
> @@ -63,20 +63,58 @@ create_x2_region()
> # When cxl-cli support for inject and clear arrives, replace
> # the writes to /sys/kernel/debug with the new cxl commands.
>
> +_do_poison_sysfs()
> +{
> + local action="$1" dev="$2" addr="$3"
> + local expect_fail=${4:-false}
> +
> + if "$expect_fail"; then
> + if echo "$addr" >
> "/sys/kernel/debug/cxl/$dev/${action}_poison"; then
> + echo "Expected ${action}_poison to fail for $addr"
> + err "$LINENO"
> + fi
> + else
> + echo "$addr" > "/sys/kernel/debug/cxl/$dev/${action}_poison"
> + fi
> +}
> +
> inject_poison_sysfs()
> {
> - memdev="$1"
> - addr="$2"
> -
> - echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
> + _do_poison_sysfs 'inject' "$@"
> }
>
> clear_poison_sysfs()
> {
> - memdev="$1"
> - addr="$2"
> + _do_poison_sysfs 'clear' "$@"
> +}
>
> - echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
> +check_trace_entry()
> +{
> + local expected_region="$1"
> + local expected_hpa="$2"
> +
> + local trace_line
> + trace_line=$(grep "cxl_poison" /sys/kernel/tracing/trace | tail -n 1)
> + if [[ -z "$trace_line" ]]; then
> + echo "No cxl_poison trace event found"
> + err "$LINENO"
> + fi
> +
> + local trace_region trace_hpa
> + trace_region=$(echo "$trace_line" | grep -o 'region=[^ ]*' | cut -d=
> -f2)
> + trace_hpa=$(echo "$trace_line" | grep -o 'hpa=0x[0-9a-fA-F]\+' | cut
> -d= -f2)
> +
> + if [[ "$trace_region" != "$expected_region" ]]; then
> + echo "Expected region $expected_region not found in trace"
> + echo "$trace_line"
> + err "$LINENO"
> + fi
> +
> + if [[ "$trace_hpa" != "$expected_hpa" ]]; then
> + echo "Expected HPA $expected_hpa not found in trace"
> + echo "$trace_line"
> + err "$LINENO"
> + fi
> }
>
> validate_poison_found()
> @@ -97,7 +135,7 @@ validate_poison_found()
> fi
> }
>
> -test_poison_by_memdev()
> +test_poison_by_memdev_by_dpa()
> {
> find_memdev
> inject_poison_sysfs "$memdev" "0x40000000"
> @@ -113,9 +151,8 @@ test_poison_by_memdev()
> validate_poison_found "-m $memdev" 0
> }
>
> -test_poison_by_region()
> +test_poison_by_region_by_dpa()
> {
> - create_x2_region
> inject_poison_sysfs "$mem0" "0x40000000"
> inject_poison_sysfs "$mem1" "0x40000000"
> validate_poison_found "-r $region" 2
> @@ -125,13 +162,78 @@ test_poison_by_region()
> validate_poison_found "-r $region" 0
> }
>
> -# Turn tracing on. Note that 'cxl list --media-errors' toggles the tracing.
> -# Turning it on here allows the test user to also view inject and clear
> -# trace events.
> +test_poison_by_region_offset()
> +{
> + local base gran hpa1 hpa2
> + base=$(cat /sys/bus/cxl/devices/"$region"/resource)
> + gran=$(cat /sys/bus/cxl/devices/"$region"/interleave_granularity)
> +
> + # Test two HPA addresses: base and base + granularity
> + # This hits the two memdevs in the region interleave.
> + hpa1=$(printf "0x%x" $((base)))
> + hpa2=$(printf "0x%x" $((base + gran)))
> +
> + # Inject at the offset and check result using the hpa's
> + # ABI takes an offset, but recall the hpa to check trace event
> +
> + inject_poison_sysfs "$region" 0
> + check_trace_entry "$region" "$hpa1"
> + inject_poison_sysfs "$region" "$gran"
> + check_trace_entry "$region" "$hpa2"
> + validate_poison_found "-r $region" 2
> +
> + clear_poison_sysfs "$region" 0
> + check_trace_entry "$region" "$hpa1"
> + clear_poison_sysfs "$region" "$gran"
> + check_trace_entry "$region" "$hpa2"
> + validate_poison_found "-r $region" 0
> +}
> +
> +test_poison_by_region_offset_negative()
> +{
> + local region_size cache_size cache_offset exceed_offset large_offset
> + region_size=$(cat /sys/bus/cxl/devices/"$region"/size)
> + cache_size=0
> +
> + # This case is a no-op until cxl-test ELC mocking arrives
> + # Try to get cache_size if the attribute exists
> + if [ -f "/sys/bus/cxl/devices/$region/cache_size" ]; then
> + cache_size=$(cat /sys/bus/cxl/devices/"$region"/cache_size)
> + fi
> +
> + # Offset within extended linear cache (if cache_size > 0)
> + if [[ $cache_size -gt 0 ]]; then
> + cache_offset=$((cache_size - 1))
> + echo "Testing offset within cache: $cache_offset (cache_size:
> $cache_size)"
> + inject_poison_sysfs "$region" "$cache_offset" true
> + clear_poison_sysfs "$region" "$cache_offset" true
> + else
> + echo "Skipping cache test - cache_size is 0"
> + fi
> +
> + # Offset exceeds region size
> + exceed_offset=$((region_size))
> + inject_poison_sysfs "$region" "$exceed_offset" true
> + clear_poison_sysfs "$region" "$exceed_offset" true
> +
> + # Offset exceeds region size by a lot
> + large_offset=$((region_size * 2))
> + inject_poison_sysfs "$region" "$large_offset" true
> + clear_poison_sysfs "$region" "$large_offset" true
> +}
> +
> +# Clear old trace events, enable cxl_poison, enable global tracing
> +echo "" > /sys/kernel/tracing/trace
> echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
> +echo 1 > /sys/kernel/tracing/tracing_on
>
> -test_poison_by_memdev
> -test_poison_by_region
> +test_poison_by_memdev_by_dpa
> +create_x2_region
> +test_poison_by_region_by_dpa
> +[ -f "/sys/kernel/debug/cxl/$region/inject_poison" ] ||
> + do_skip "test cases requires inject by region kernel support"
> +test_poison_by_region_offset
> +test_poison_by_region_offset_negative
>
> check_dmesg "$LINENO"
>