https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/206876
>From 5e59d1f039ce1ca4d5df9f0ce20e22129c8a8a4c Mon Sep 17 00:00:00 2001 From: Yonghong Song <[email protected]> Date: Tue, 30 Jun 2026 17:03:39 -0700 Subject: [PATCH] [BPF] Return small aggregates directly in registers Previously the BPF ABI always returned aggregate (struct/union) types indirectly through an sret pointer, regardless of size. This is inconsistent with how classifyArgumentType() already passes small aggregates: arguments up to 128 bits are coerced into one or two 64-bit registers, while only larger aggregates use an indirect reference. Make classifyReturnType() mirror that convention by factoring the shared aggregate handling into a classifyAggregateType() helper used by both: - empty aggregates (0 bits) are ignored; - aggregates up to 64 bits are returned directly, coerced to an integer of the padded size; - aggregates of 65..128 bits are returned directly as [2 x i64]; - aggregates larger than 128 bits are returned indirectly via sret. This keeps each returned value within the backend's two-register return convention and avoids an unnecessary memory round-trip for small structs. This also aligns BPF with the general-purpose C ABIs of other targets: both x86-64 (System V, RAX:RDX) and AArch64 (AAPCS64, X0:X1) return aggregates up to 16 bytes in a pair of registers and only fall back to an indirect sret pointer for larger ones. --- clang/lib/CodeGen/Targets/BPF.cpp | 50 +++--- clang/test/CodeGen/bpf-struct-return-regs.c | 73 ++++++++ clang/test/CodeGen/bpf-struct-return.c | 70 ++++++++ llvm/test/CodeGen/BPF/aggr_ret_regs.ll | 189 ++++++++++++++++++++ 4 files changed, 359 insertions(+), 23 deletions(-) create mode 100644 clang/test/CodeGen/bpf-struct-return-regs.c create mode 100644 clang/test/CodeGen/bpf-struct-return.c create mode 100644 llvm/test/CodeGen/BPF/aggr_ret_regs.ll diff --git a/clang/lib/CodeGen/Targets/BPF.cpp b/clang/lib/CodeGen/Targets/BPF.cpp index 3a7af346f1132..d3318d76703f2 100644 --- a/clang/lib/CodeGen/Targets/BPF.cpp +++ b/clang/lib/CodeGen/Targets/BPF.cpp @@ -22,30 +22,35 @@ class BPFABIInfo : public DefaultABIInfo { public: BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + // Classify an aggregate (struct/union) used as an argument or a return + // value. Aggregates that fit in 1 or 2 registers are passed/returned + // directly, coerced to an integer or a pair of 64-bit integers; larger + // ones use an indirect reference. + ABIArgInfo classifyAggregateType(QualType Ty) const { + uint64_t Bits = getContext().getTypeSize(Ty); + if (Bits == 0) + return ABIArgInfo::getIgnore(); + + // Larger aggregates use an indirect reference. + if (Bits > 128) + return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace()); + + // If the aggregate needs 1 or 2 registers, do not use reference. + llvm::Type *CoerceTy; + if (Bits <= 64) { + CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); + } else { + llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64); + CoerceTy = llvm::ArrayType::get(RegTy, 2); + } + return ABIArgInfo::getDirect(CoerceTy); + } + ABIArgInfo classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); - if (isAggregateTypeForABI(Ty)) { - uint64_t Bits = getContext().getTypeSize(Ty); - if (Bits == 0) - return ABIArgInfo::getIgnore(); - - // If the aggregate needs 1 or 2 registers, do not use reference. - if (Bits <= 128) { - llvm::Type *CoerceTy; - if (Bits <= 64) { - CoerceTy = - llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); - } else { - llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64); - CoerceTy = llvm::ArrayType::get(RegTy, 2); - } - return ABIArgInfo::getDirect(CoerceTy); - } else { - return getNaturalAlignIndirect(Ty, - getDataLayout().getAllocaAddrSpace()); - } - } + if (isAggregateTypeForABI(Ty)) + return classifyAggregateType(Ty); if (const auto *ED = Ty->getAsEnumDecl()) Ty = ED->getIntegerType(); @@ -65,8 +70,7 @@ class BPFABIInfo : public DefaultABIInfo { return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy)) - return getNaturalAlignIndirect(RetTy, - getDataLayout().getAllocaAddrSpace()); + return classifyAggregateType(RetTy); // Treat an enum type as its underlying type. if (const auto *ED = RetTy->getAsEnumDecl()) diff --git a/clang/test/CodeGen/bpf-struct-return-regs.c b/clang/test/CodeGen/bpf-struct-return-regs.c new file mode 100644 index 0000000000000..b8891613b053b --- /dev/null +++ b/clang/test/CodeGen/bpf-struct-return-regs.c @@ -0,0 +1,73 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 -triple bpf -O2 -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s +// +// Aggregates up to 16 bytes are returned directly in registers: coerced to an +// integer when they fit in one register (<= 8 bytes), or to [2 x i64] when +// they need two (9..16 bytes). + +struct foo1 {int a;}; // 4 bytes -> one register +struct foo2 {int a; long b;}; // 16 bytes -> two registers +struct foo3 {int a; int b; long c;}; // 16 bytes -> two registers +struct foo4 {int a; int b:20; int c:20; int d:24;}; // 16 bytes -> two registers + +#define __noinline __attribute__((noinline)) + +__noinline struct foo1 bar1(int a) { +// CHECK-LABEL: define dso_local i32 @bar1( +// CHECK: ret i32 + struct foo1 v = {a}; + return v; +} + +__noinline struct foo2 bar2(int a, int b) { +// CHECK-LABEL: define dso_local [2 x i64] @bar2( +// CHECK: ret [2 x i64] + struct foo2 v = {a, b}; + return v; +} + +__noinline struct foo3 bar3(int a, int b, int c) { +// CHECK-LABEL: define dso_local [2 x i64] @bar3( +// CHECK: ret [2 x i64] + struct foo3 v = {a, b, c}; + return v; +} + +__noinline struct foo4 bar4(int a, int b, int c, int d) { +// CHECK-LABEL: define dso_local [2 x i64] @bar4( +// CHECK: ret [2 x i64] + struct foo4 v = {a, b, c, d}; + return v; +} + +int check1(int a) { +// CHECK-LABEL: define dso_local i32 @check1( +// CHECK: %[[C1:.*]] = call i32 @bar1( +// CHECK: store i32 %[[C1]] + struct foo1 v1 = bar1(a); + return v1.a; +} + +int check2(int a, int b) { +// CHECK-LABEL: define dso_local i32 @check2( +// CHECK: %[[C2:.*]] = call [2 x i64] @bar2( +// CHECK: store [2 x i64] %[[C2]] + struct foo2 v1 = bar2(a, b); + return v1.a + v1.b; +} + +int check3(int a, int b, int c) { +// CHECK-LABEL: define dso_local i32 @check3( +// CHECK: %[[C3:.*]] = call [2 x i64] @bar3( +// CHECK: store [2 x i64] %[[C3]] + struct foo3 v1 = bar3(a, b, c); + return v1.a + v1.b + v1.c; +} + +int check4(int a, int b, int c, int d) { +// CHECK-LABEL: define dso_local i32 @check4( +// CHECK: %[[C4:.*]] = call [2 x i64] @bar4( +// CHECK: store [2 x i64] %[[C4]] + struct foo4 v1 = bar4(a, b, c, d); + return v1.a + v1.b + v1.c + v1.d; +} diff --git a/clang/test/CodeGen/bpf-struct-return.c b/clang/test/CodeGen/bpf-struct-return.c new file mode 100644 index 0000000000000..d7eab2afda172 --- /dev/null +++ b/clang/test/CodeGen/bpf-struct-return.c @@ -0,0 +1,70 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 -triple bpf -O2 -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s + +struct t1 {}; +struct t2 { + int a; +}; +struct t3 { + int a; + long b; +}; +struct t4 { + long a; + long b; + long c; +}; +struct t5 { + char a; +}; +union u1 { + int a; + long b; +}; + +struct t1 foo1(void) { +// CHECK: define dso_local void @foo1() + struct t1 tmp = {}; + return tmp; +} + +struct t2 foo2(void) { +// CHECK: define dso_local i32 @foo2() + struct t2 tmp = {}; + return tmp; +} + +struct t3 foo3(void) { +// CHECK: define dso_local [2 x i64] @foo3() + struct t3 tmp = {}; + return tmp; +} + +struct t4 foo4(void) { +// CHECK: define dso_local void @foo4(ptr dead_on_unwind noalias writable sret(%struct.t4) align 8 %agg.result) + struct t4 tmp = {}; + return tmp; +} + +struct t5 foo5(void) { +// CHECK: define dso_local i8 @foo5() + struct t5 tmp = {}; + return tmp; +} + +union u1 foou(void) { +// CHECK: define dso_local i64 @foou() + union u1 tmp = {}; + return tmp; +} + +int bar(void) { +// CHECK-LABEL: define dso_local i32 @bar() +// CHECK: %[[C2:.*]] = call i32 @foo2() +// CHECK: store i32 %[[C2]] +// CHECK: %[[C3:.*]] = call [2 x i64] @foo3() +// CHECK: store [2 x i64] %[[C3]] + struct t2 a = foo2(); + struct t3 b = foo3(); + return a.a + b.a; +} diff --git a/llvm/test/CodeGen/BPF/aggr_ret_regs.ll b/llvm/test/CodeGen/BPF/aggr_ret_regs.ll new file mode 100644 index 0000000000000..8a7d165f0acb0 --- /dev/null +++ b/llvm/test/CodeGen/BPF/aggr_ret_regs.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=bpfel -mcpu=v1 | FileCheck %s +; +; Generated from clang/test/CodeGen/bpf-struct-return-regs.c to show the +; machine instructions for returning aggregates in registers: +; - foo1 (4 bytes) is coerced to i32 and returned in one register (r0); +; - foo2/foo3/foo4 (16 bytes) are coerced to [2 x i64] and returned in two +; registers (r0 and r2). + +target triple = "bpf" + +define dso_local noundef i32 @bar1(i32 noundef returned %0) local_unnamed_addr #0 { +; CHECK-LABEL: bar1: +; CHECK: .Lbar1$local: +; CHECK-NEXT: .type .Lbar1$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: exit + ret i32 %0 +} + +define dso_local [2 x i64] @bar2(i32 noundef %0, i32 noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: bar2: +; CHECK: .Lbar2$local: +; CHECK-NEXT: .type .Lbar2$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: r0 <<= 32 +; CHECK-NEXT: r0 >>= 32 +; CHECK-NEXT: r2 <<= 32 +; CHECK-NEXT: r2 s>>= 32 +; CHECK-NEXT: exit + %3 = sext i32 %1 to i64 + %4 = zext i32 %0 to i64 + %5 = insertvalue [2 x i64] poison, i64 %4, 0 + %6 = insertvalue [2 x i64] %5, i64 %3, 1 + ret [2 x i64] %6 +} + +define dso_local [2 x i64] @bar3(i32 noundef %0, i32 noundef %1, i32 noundef %2) local_unnamed_addr #0 { +; CHECK-LABEL: bar3: +; CHECK: .Lbar3$local: +; CHECK-NEXT: .type .Lbar3$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: r0 <<= 32 +; CHECK-NEXT: r1 <<= 32 +; CHECK-NEXT: r1 >>= 32 +; CHECK-NEXT: r0 |= r1 +; CHECK-NEXT: r3 <<= 32 +; CHECK-NEXT: r3 s>>= 32 +; CHECK-NEXT: r2 = r3 +; CHECK-NEXT: exit + %4 = sext i32 %2 to i64 + %5 = zext i32 %1 to i64 + %6 = shl nuw i64 %5, 32 + %7 = zext i32 %0 to i64 + %8 = or disjoint i64 %6, %7 + %9 = insertvalue [2 x i64] poison, i64 %8, 0 + %10 = insertvalue [2 x i64] %9, i64 %4, 1 + ret [2 x i64] %10 +} + +define dso_local [2 x i64] @bar4(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 { +; CHECK-LABEL: bar4: +; CHECK: .Lbar4$local: +; CHECK-NEXT: .type .Lbar4$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: r1 <<= 32 +; CHECK-NEXT: r1 >>= 32 +; CHECK-NEXT: r0 &= 1048575 +; CHECK-NEXT: r0 <<= 32 +; CHECK-NEXT: r0 |= r1 +; CHECK-NEXT: r3 &= 1048575 +; CHECK-NEXT: r4 &= 16777215 +; CHECK-NEXT: r4 <<= 32 +; CHECK-NEXT: r4 |= r3 +; CHECK-NEXT: r2 = r4 +; CHECK-NEXT: exit + %5 = and i32 %1, 1048575 + %6 = and i32 %2, 1048575 + %7 = and i32 %3, 16777215 + %8 = zext nneg i32 %5 to i64 + %9 = shl nuw nsw i64 %8, 32 + %10 = zext i32 %0 to i64 + %11 = or disjoint i64 %9, %10 + %12 = insertvalue [2 x i64] poison, i64 %11, 0 + %13 = zext nneg i32 %7 to i64 + %14 = shl nuw nsw i64 %13, 32 + %15 = zext nneg i32 %6 to i64 + %16 = or disjoint i64 %14, %15 + %17 = insertvalue [2 x i64] %12, i64 %16, 1 + ret [2 x i64] %17 +} + +define dso_local noundef i32 @check1(i32 noundef returned %0) local_unnamed_addr #1 { +; CHECK-LABEL: check1: +; CHECK: .Lcheck1$local: +; CHECK-NEXT: .type .Lcheck1$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: exit + ret i32 %0 +} + +define dso_local i32 @check2(i32 noundef %0, i32 noundef %1) local_unnamed_addr #1 { +; CHECK-LABEL: check2: +; CHECK: .Lcheck2$local: +; CHECK-NEXT: .type .Lcheck2$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call bar2 +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit + %3 = tail call [2 x i64] @bar2(i32 noundef %0, i32 noundef %1) + %4 = extractvalue [2 x i64] %3, 0 + %5 = extractvalue [2 x i64] %3, 1 + %6 = add i64 %4, %5 + %7 = trunc i64 %6 to i32 + ret i32 %7 +} + +define dso_local i32 @check3(i32 noundef %0, i32 noundef %1, i32 noundef %2) local_unnamed_addr #1 { +; CHECK-LABEL: check3: +; CHECK: .Lcheck3$local: +; CHECK-NEXT: .type .Lcheck3$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call bar3 +; CHECK-NEXT: r1 = 4294967297 ll +; CHECK-NEXT: r0 *= r1 +; CHECK-NEXT: r0 >>= 32 +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit + %4 = tail call [2 x i64] @bar3(i32 noundef %0, i32 noundef %1, i32 noundef %2) + %5 = extractvalue [2 x i64] %4, 0 + %6 = extractvalue [2 x i64] %4, 1 + %7 = mul i64 %5, 4294967297 + %8 = lshr i64 %7, 32 + %9 = add i64 %8, %6 + %10 = trunc i64 %9 to i32 + ret i32 %10 +} + +define dso_local i32 @check4(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #1 { +; CHECK-LABEL: check4: +; CHECK: .Lcheck4$local: +; CHECK-NEXT: .type .Lcheck4$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call bar4 +; CHECK-NEXT: r1 = r0 +; CHECK-NEXT: r1 <<= 12 +; CHECK-NEXT: r1 s>>= 44 +; CHECK-NEXT: r1 += r0 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 <<= 44 +; CHECK-NEXT: r3 s>>= 44 +; CHECK-NEXT: r1 += r3 +; CHECK-NEXT: r2 <<= 8 +; CHECK-NEXT: r2 s>>= 40 +; CHECK-NEXT: r1 += r2 +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: exit + %5 = tail call [2 x i64] @bar4(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) + %6 = extractvalue [2 x i64] %5, 0 + %7 = extractvalue [2 x i64] %5, 1 + %8 = trunc i64 %6 to i32 + %9 = lshr i64 %6, 20 + %10 = trunc i64 %9 to i32 + %11 = ashr i32 %10, 12 + %12 = add nsw i32 %11, %8 + %13 = trunc i64 %7 to i32 + %14 = shl i32 %13, 12 + %15 = ashr exact i32 %14, 12 + %16 = add nsw i32 %12, %15 + %17 = lshr i64 %7, 24 + %18 = trunc i64 %17 to i32 + %19 = ashr i32 %18, 8 + %20 = add nsw i32 %16, %19 + ret i32 %20 +} + _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
