On Thu, Aug 14, 2025 at 04:07:39PM +0000, Jiawei Zhao wrote: > usdt_o2 is intended to exercise the SIB (Scale-Index-Base) argument > handling in libbpf's USDT path. With GCC 13 this reliably produced a > SIB-form argument (e.g. 8@(%rdx,%rax,8)), but with newer GCC (e.g. 15) > the compiler frequently optimizes the probe argument into a plain > register (e.g. 8@%rax) or a stack slot, so the test stops covering the > SIB code path and becomes flaky across toolchains. > > Force a SIB memory operand in the probe by: > * placing the base pointer into %rdx and the index into %rax using an > empty inline asm with output constraints ("=d", "=a") and matching > inputs > * immediately passing base[idx] to STAP_PROBE1. > * only enable on x86 platform. > > This makes the compiler encode the operand as SIB (base + index8), > which in .note.stapsdt shows up as 8@(%rdx,%rax,8) regardless of GCC > version. A memory clobber and noinline prevent reordering/re-allocation > around the probe site. > > This change is x86_64-specific and does not alter program semantics; it > only stabilizes the USDT argument shape so the test consistently > validates SIB handling. Clang historically prefers stack temporaries for > such operands, but the selftests build with GCC, and this keeps behavior > stable across GCC versions without introducing a separate .S file. > > Signed-off-by: Jiawei Zhao <phoenix500...@163.com> > --- > .../selftests/bpf/prog_tests/usdt_o2.c | 20 ++++++++++++++----- > 1 file changed, 15 insertions(+), 5 deletions(-) > > diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c > b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c > index f02dcf5188ab..e46d5743ad24 100644 > --- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c > +++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c > @@ -15,11 +15,19 @@ __attribute__((optimize("O2"))) > int lets_test_this(int); > static volatile __u64 array[1] = {test_value}; > > -static __always_inline void trigger_func(void) > +static noinline void trigger_func(void) > { > +#if defined(__x86_64__) || defined(__i386__) > /* Base address + offset + (index * scale) */ > - for (volatile int i = 0; i <= 0; i++) > - STAP_PROBE1(test, usdt1, array[i]); > + /* Force SIB addressing with inline assembly */ > + const __u64 *base; > + __u32 idx; > + /* binding base to %rdx and idx to %rax */ > + asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : > "memory"); > + STAP_PROBE1(test, usdt1, base[idx]);
hum, I still end up with stapsdt 0x0000002a NT_STAPSDT (SystemTap probe descriptors) Provider: test Name: usdt1 Location: 0x00000000007674c9, Base: 0x00000000035bc698, Semaphore: 0x0000000000000000 Arguments: 8@%rax disasm being: static noinline void trigger_func(void) { 76749f: 55 push %rbp 7674a0: 48 89 e5 mov %rsp,%rbp /* Base address + offset + (index * scale) */ /* Force SIB addressing with inline assembly */ const __u64 *base; __u32 idx; /* binding base to %rdx and idx to %rax */ asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : "memory"); 7674a3: ba 20 49 9c 03 mov $0x39c4920,%edx 7674a8: b8 00 00 00 00 mov $0x0,%eax 7674ad: 48 89 55 f8 mov %rdx,-0x8(%rbp) 7674b1: 89 45 f4 mov %eax,-0xc(%rbp) STAP_PROBE1(test, usdt1, base[idx]); 7674b4: 8b 45 f4 mov -0xc(%rbp),%eax 7674b7: 48 8d 14 c5 00 00 00 lea 0x0(,%rax,8),%rdx 7674be: 00 7674bf: 48 8b 45 f8 mov -0x8(%rbp),%rax 7674c3: 48 01 d0 add %rdx,%rax 7674c6: 48 8b 00 mov (%rax),%rax 7674c9: 90 nop #else STAP_PROBE1(test, usdt1, array[0]); #endif } 7674ca: 90 nop 7674cb: 5d pop %rbp 7674cc: c3 ret I wonder we could also try to bring in Andrii's usdt.h [1] and overload usdt arguments like outlined in the hack below (full code in [1]) we will probably need smarter and sustainable change, but you I guess you get the idea jirka [1] https://github.com/anakryiko/usdt [2] git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git usdt_hack --- diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c index e46d5743ad24..7bb098c37de5 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c @@ -4,6 +4,8 @@ #include "../sdt.h" #include "test_usdt_o2.skel.h" +#define USDT_ARGS ".asciz \"(,%%rax,8)\"\n" +#include "usdt.h" #if defined(__GNUC__) && !defined(__clang__) __attribute__((optimize("O2"))) @@ -28,6 +30,7 @@ static noinline void trigger_func(void) #else STAP_PROBE1(test, usdt1, array[0]); #endif + USDT(krava, test1, 1, 2); } static void basic_sib_usdt(void) diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h index 549d1f774810..960ebd6aa88b 100644 --- a/tools/testing/selftests/bpf/usdt.h +++ b/tools/testing/selftests/bpf/usdt.h @@ -403,6 +403,10 @@ struct usdt_sema { volatile unsigned short active; }; __asm__ __volatile__ ("" :: "m" (sema)); #endif +#ifndef USDT_ARGS +#define USDT_ARGS __usdt_asm_args(__VA_ARGS__) +#endif + /* main USDT definition (nop and .note.stapsdt metadata) */ #define __usdt_probe(group, name, sema_def, sema, ...) do { \ sema_def(sema) \ @@ -418,7 +422,7 @@ struct usdt_sema { volatile unsigned short active; }; __usdt_asm1( __usdt_asm_addr sema) \ __usdt_asm_strz(group) \ __usdt_asm_strz(name) \ - __usdt_asm_args(__VA_ARGS__) \ + USDT_ARGS \ __usdt_asm1( .ascii "\0") \ __usdt_asm1(994: .balign 4) \ __usdt_asm1( .popsection) \