Hi, Jiri. Sorry for my oversight, the optimized compile condition didn't work 
properly.

Could you try to replace the `__attribute__((optimize("O2")))` with `#pragma 
GCC optimize("O2")`
in usdt_o2.c and verify it one more time?  I guess it'll help. In fact, the 
usdt1 argument spec generated 
by the `__attribute__((optimize("O2")))` on my machine was `8@%rax`, not 
`8@(%rdx,%rax,8)`.

For more detail:

> - #if defined(__GNUC__) && !defined(__clang__)
> - __attribute__((optimize("O2")))
>
> +#if (defined(__GNUC__) && !defined(__clang__))
> +#pragma GCC optimize("O2")
> +#else
> +#pragma message("non-gcc compiler: the correct probes might not be 
> installed")
> +#endif

Thanks


At 2025-08-15 20:44:32, "Jiri Olsa" <olsaj...@gmail.com> wrote:
>On Thu, Aug 14, 2025 at 04:07:39PM +0000, Jiawei Zhao wrote:
>> usdt_o2 is intended to exercise the SIB (Scale-Index-Base) argument
>> handling in libbpf's USDT path. With GCC 13 this reliably produced a
>> SIB-form argument (e.g. 8@(%rdx,%rax,8)), but with newer GCC (e.g. 15)
>> the compiler frequently optimizes the probe argument into a plain
>> register (e.g. 8@%rax) or a stack slot, so the test stops covering the
>> SIB code path and becomes flaky across toolchains.
>> 
>> Force a SIB memory operand in the probe by:
>> * placing the base pointer into %rdx and the index into %rax using an
>>   empty inline asm with output constraints ("=d", "=a") and matching
>>   inputs
>> * immediately passing base[idx] to STAP_PROBE1.
>> * only enable on x86 platform.
>> 
>> This makes the compiler encode the operand as SIB (base + index8),
>> which in .note.stapsdt shows up as 8@(%rdx,%rax,8) regardless of GCC
>> version. A memory clobber and noinline prevent reordering/re-allocation
>> around the probe site.
>> 
>> This change is x86_64-specific and does not alter program semantics; it
>> only stabilizes the USDT argument shape so the test consistently
>> validates SIB handling. Clang historically prefers stack temporaries for
>> such operands, but the selftests build with GCC, and this keeps behavior
>> stable across GCC versions without introducing a separate .S file.
>> 
>> Signed-off-by: Jiawei Zhao <phoenix500...@163.com>
>> ---
>>  .../selftests/bpf/prog_tests/usdt_o2.c        | 20 ++++++++++++++-----
>>  1 file changed, 15 insertions(+), 5 deletions(-)
>> 
>> diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c 
>> b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
>> index f02dcf5188ab..e46d5743ad24 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
>> @@ -15,11 +15,19 @@ __attribute__((optimize("O2")))
>>  int lets_test_this(int);
>>  static volatile __u64 array[1] = {test_value};
>>  
>> -static __always_inline void trigger_func(void)
>> +static noinline void trigger_func(void)
>>  {
>> +#if defined(__x86_64__) || defined(__i386__)
>>      /* Base address + offset + (index * scale) */
>> -    for (volatile int i = 0; i <= 0; i++)
>> -            STAP_PROBE1(test, usdt1, array[i]);
>> +    /* Force SIB addressing with inline assembly */
>> +    const __u64 *base;
>> +    __u32 idx;
>> +    /* binding base to %rdx and idx to %rax */
>> +    asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : 
>> "memory");
>> +    STAP_PROBE1(test, usdt1, base[idx]);
>
>hum, I still end up with
>
>         stapsdt              0x0000002a       NT_STAPSDT (SystemTap probe 
> descriptors)
>           Provider: test
>           Name: usdt1
>           Location: 0x00000000007674c9, Base: 0x00000000035bc698, Semaphore: 
> 0x0000000000000000
>           Arguments: 8@%rax
>
>disasm being:
>
>       static noinline void trigger_func(void)
>       {
>         76749f:       55                      push   %rbp
>         7674a0:       48 89 e5                mov    %rsp,%rbp
>               /* Base address + offset + (index * scale) */
>               /* Force SIB addressing with inline assembly */
>               const __u64 *base;
>               __u32 idx;
>               /* binding base to %rdx and idx to %rax */
>               asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), 
> "1"((__u32)0) : "memory");
>         7674a3:       ba 20 49 9c 03          mov    $0x39c4920,%edx
>         7674a8:       b8 00 00 00 00          mov    $0x0,%eax
>         7674ad:       48 89 55 f8             mov    %rdx,-0x8(%rbp)
>         7674b1:       89 45 f4                mov    %eax,-0xc(%rbp)
>               STAP_PROBE1(test, usdt1, base[idx]);
>         7674b4:       8b 45 f4                mov    -0xc(%rbp),%eax
>         7674b7:       48 8d 14 c5 00 00 00    lea    0x0(,%rax,8),%rdx
>         7674be:       00
>         7674bf:       48 8b 45 f8             mov    -0x8(%rbp),%rax
>         7674c3:       48 01 d0                add    %rdx,%rax
>         7674c6:       48 8b 00                mov    (%rax),%rax
>         7674c9:       90                      nop
>       #else
>               STAP_PROBE1(test, usdt1, array[0]);
>       #endif
>       }
>         7674ca:       90                      nop
>         7674cb:       5d                      pop    %rbp
>         7674cc:       c3                      ret
>
>
>I wonder we could also try to bring in Andrii's usdt.h [1] and overload usdt
>arguments like outlined in the hack below (full code in [1])
>
>we will probably need smarter and sustainable change, but you I guess you get
>the idea
>
>jirka
>
>
>[1] https://github.com/anakryiko/usdt
>[2] git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git usdt_hack
>---
>diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c 
>b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
>index e46d5743ad24..7bb098c37de5 100644
>--- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
>+++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
>@@ -4,6 +4,8 @@
> 
> #include "../sdt.h"
> #include "test_usdt_o2.skel.h"
>+#define USDT_ARGS ".asciz \"(,%%rax,8)\"\n"
>+#include "usdt.h"
> 
> #if defined(__GNUC__) && !defined(__clang__)
> __attribute__((optimize("O2")))
>@@ -28,6 +30,7 @@ static noinline void trigger_func(void)
> #else
>       STAP_PROBE1(test, usdt1, array[0]);
> #endif
>+      USDT(krava, test1, 1, 2);
> }
> 
> static void basic_sib_usdt(void)
>diff --git a/tools/testing/selftests/bpf/usdt.h 
>b/tools/testing/selftests/bpf/usdt.h
>index 549d1f774810..960ebd6aa88b 100644
>--- a/tools/testing/selftests/bpf/usdt.h
>+++ b/tools/testing/selftests/bpf/usdt.h
>@@ -403,6 +403,10 @@ struct usdt_sema { volatile unsigned short active; };
>       __asm__ __volatile__ ("" :: "m" (sema));
> #endif
> 
>+#ifndef USDT_ARGS
>+#define USDT_ARGS __usdt_asm_args(__VA_ARGS__)
>+#endif
>+
> /* main USDT definition (nop and .note.stapsdt metadata) */
> #define __usdt_probe(group, name, sema_def, sema, ...) do {                   
>                 \
>       sema_def(sema)                                                          
>                 \
>@@ -418,7 +422,7 @@ struct usdt_sema { volatile unsigned short active; };
>       __usdt_asm1(            __usdt_asm_addr sema)                           
>                 \
>       __usdt_asm_strz(group)                                                  
>                 \
>       __usdt_asm_strz(name)                                                   
>                 \
>-      __usdt_asm_args(__VA_ARGS__)                                            
>                \
>+      USDT_ARGS                                                               
>                \
>       __usdt_asm1(            .ascii "\0")                                    
>                 \
>       __usdt_asm1(994:        .balign 4)                                      
>                 \
>       __usdt_asm1(            .popsection)                                    
>                 \

Reply via email to