> diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> index 969f4deba9fd..efff0c515184 100644
> --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
[ ... ]
> @@ -855,6 +897,37 @@ static void test_uprobe_race(void)
> #define __NR_uprobe 336
> #endif
>
> +static void test_uprobe_red_zone(void)
> +{
> + struct uprobe_syscall_executed *skel;
> + struct bpf_link *link;
> + void *nop10_addr;
> + size_t offset;
> + int i;
> +
> + nop10_addr = find_nop10(uprobe_red_zone_test);
> + if (!ASSERT_NEQ(nop10_addr, NULL, "find_nop10"))
> + return;
> +
> + skel = uprobe_syscall_executed__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "open_and_load"))
> + return;
> +
> + offset = get_uprobe_offset(nop10_addr);
> + link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
> + 0, "/proc/self/exe", offset, NULL);
> + if (!ASSERT_OK_PTR(link, "attach_uprobe"))
> + goto cleanup;
> +
> + for (i = 0; i < 10; i++)
> + ASSERT_EQ(uprobe_red_zone_test(), 0, "red_zone_intact");
Does this loop execute too quickly to guarantee the uprobe optimization has
completed?
Uprobe optimization is performed asynchronously by a worker thread. Since this
is a tight loop with no delays or blocking, the 10 iterations might finish
before the background worker patches the code.
If all hits execute on the unoptimized INT3 path, the test could falsely pass
on kernels where the CALL-based optimization bug is present.
(This concern was raised by [email protected] in v1 and remains
unaddressed:
https://lore.kernel.org/bpf/[email protected]/)
> +
> + bpf_link__destroy(link);
> +
> +cleanup:
> + uprobe_syscall_executed__destroy(skel);
> +}
[ ... ]
> diff --git a/tools/testing/selftests/bpf/usdt_2.c
> b/tools/testing/selftests/bpf/usdt_2.c
> index b359b389f6c0..5e38f8605b02 100644
> --- a/tools/testing/selftests/bpf/usdt_2.c
> +++ b/tools/testing/selftests/bpf/usdt_2.c
> @@ -13,4 +13,17 @@ void usdt_2(void)
> USDT(optimized_attach, usdt_2);
> }
>
> +static volatile unsigned long usdt_red_zone_arg1 = 0xDEADBEEF;
> +static volatile unsigned long usdt_red_zone_arg2 = 0xCAFEBABE;
> +static volatile unsigned long usdt_red_zone_arg3 = 0xFEEDFACE;
> +
> +void __attribute__((noinline)) usdt_red_zone_trigger(void)
> +{
> + unsigned long a1 = usdt_red_zone_arg1;
> + unsigned long a2 = usdt_red_zone_arg2;
> + unsigned long a3 = usdt_red_zone_arg3;
> +
> + USDT(optimized_attach, usdt_red_zone, a1, a2, a3);
> +}
Is this test reliable across different compiler versions and optimizations?
The USDT macro uses the "nor" inline assembly constraint, which allows the
compiler to allocate these operands to CPU registers.
Because there is no register pressure in usdt_red_zone_trigger(), modern
compilers might allocate a1, a2, and a3 directly to registers rather than the
stack's red zone.
If the operands are placed in registers, the uprobe CALL optimization
clobbering [rsp-8] will not corrupt them. This could cause the test to pass
even on buggy kernels, creating a false positive.
Would it be safer to explicitly force these operands into the red zone using
inline assembly constraints, rather than depending on the compiler's register
allocator?
(This concern was raised by [email protected] in v1 and v3 and remains
unaddressed:
https://lore.kernel.org/bpf/e91d31e0c1378fe970db6385c7af419e97af0c2acd37b3b8775a2fac02ea2...@mail.kernel.org/)
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26475828601