https://github.com/E00N777 updated https://github.com/llvm/llvm-project/pull/185852
>From 8ddab65655129d825c4a5465cd3613ed979bc04f Mon Sep 17 00:00:00 2001 From: E0N777 <[email protected]> Date: Wed, 11 Mar 2026 18:20:45 +0800 Subject: [PATCH 1/3] [CIR][AArch64] Lower BF16 vduph lane builtins --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 12 ++++++-- clang/test/CodeGen/AArch64/neon/bf16-vduph.c | 30 +++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/neon/bf16-vduph.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 5534e69b5f8bc..564d3e47a8c24 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2802,10 +2802,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vqdmlsls_lane_s32: case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: case NEON::BI__builtin_neon_vget_lane_bf16: - case NEON::BI__builtin_neon_vduph_lane_bf16: case NEON::BI__builtin_neon_vduph_lane_f16: case NEON::BI__builtin_neon_vgetq_lane_bf16: - case NEON::BI__builtin_neon_vduph_laneq_bf16: case NEON::BI__builtin_neon_vduph_laneq_f16: case NEON::BI__builtin_neon_vcvt_bf16_f32: case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: @@ -2824,6 +2822,16 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } + switch (builtinID) { + default: + break; + case NEON::BI__builtin_neon_vduph_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: { + uint64_t index = getZExtIntValueFromConstOp(ops[1]); + return builder.createExtractElement(loc, ops[0], index); + } + } + cir::VectorType ty = getNeonType(this, type, loc); if (!ty) return nullptr; diff --git a/clang/test/CodeGen/AArch64/neon/bf16-vduph.c b/clang/test/CodeGen/AArch64/neon/bf16-vduph.c new file mode 100644 index 0000000000000..e38383f567d98 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/bf16-vduph.c @@ -0,0 +1,30 @@ +// REQUIRES: aarch64-registered-target || arm-registered-target + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} + +typedef __bf16 bfloat16_t; +typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t; +typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t; + +// LLVM-LABEL: @test_vduph_lane_bf16( +// LLVM-SAME: <4 x bfloat> {{.*}} [[V:%.*]]) +// LLVM: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V]], i{{32|64}} 1 +// LLVM: ret bfloat [[VGET_LANE]] +// CIR-LABEL: @test_vduph_lane_bf16( +// CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !cir.bf16> +bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { + return __builtin_bit_cast(bfloat16_t, __builtin_neon_vduph_lane_bf16(v, 1)); +} + +// LLVM-LABEL: @test_vduph_laneq_bf16( +// LLVM-SAME: <8 x bfloat> {{.*}} [[V:%.*]]) +// LLVM: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V]], i{{32|64}} 7 +// LLVM: ret bfloat [[VGETQ_LANE]] +// CIR-LABEL: @test_vduph_laneq_bf16( +// CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<8 x !cir.bf16> +bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { + return __builtin_bit_cast(bfloat16_t, + __builtin_neon_vduph_laneq_bf16(v, 7)); +} >From 0ab4d48e336ff3c3b82a0341f04d20b7eca7b5e9 Mon Sep 17 00:00:00 2001 From: E0N777 <[email protected]> Date: Thu, 12 Mar 2026 10:44:15 +0800 Subject: [PATCH 2/3] Address review comments on AArch64 lane builtins --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 27 +++++++------ clang/test/CodeGen/AArch64/neon/bf16-vduph.c | 38 +++++++++---------- 2 files changed, 30 insertions(+), 35 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 564d3e47a8c24..82d952d5066ba 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2724,6 +2724,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vset_lane_mf8: case NEON::BI__builtin_neon_vsetq_lane_mf8: case NEON::BI__builtin_neon_vsetq_lane_f64: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vdupb_lane_i8: case NEON::BI__builtin_neon_vgetq_lane_i8: @@ -2752,6 +2756,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vdups_laneq_f32: case NEON::BI__builtin_neon_vgetq_lane_f64: case NEON::BI__builtin_neon_vdupd_laneq_f64: + case NEON::BI__builtin_neon_vget_lane_bf16: + case NEON::BI__builtin_neon_vduph_lane_bf16: + case NEON::BI__builtin_neon_vduph_lane_f16: + case NEON::BI__builtin_neon_vgetq_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: + case NEON::BI__builtin_neon_vduph_laneq_f16: { + uint64_t index = getZExtIntValueFromConstOp(ops[1]); + return builder.createExtractElement(loc, ops[0], index); + } case NEON::BI__builtin_neon_vaddh_f16: case NEON::BI__builtin_neon_vsubh_f16: case NEON::BI__builtin_neon_vmulh_f16: @@ -2801,10 +2814,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vqdmlals_laneq_s32: case NEON::BI__builtin_neon_vqdmlsls_lane_s32: case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: - case NEON::BI__builtin_neon_vget_lane_bf16: - case NEON::BI__builtin_neon_vduph_lane_f16: - case NEON::BI__builtin_neon_vgetq_lane_bf16: - case NEON::BI__builtin_neon_vduph_laneq_f16: case NEON::BI__builtin_neon_vcvt_bf16_f32: case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: @@ -2822,16 +2831,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } - switch (builtinID) { - default: - break; - case NEON::BI__builtin_neon_vduph_lane_bf16: - case NEON::BI__builtin_neon_vduph_laneq_bf16: { - uint64_t index = getZExtIntValueFromConstOp(ops[1]); - return builder.createExtractElement(loc, ops[0], index); - } - } - cir::VectorType ty = getNeonType(this, type, loc); if (!ty) return nullptr; diff --git a/clang/test/CodeGen/AArch64/neon/bf16-vduph.c b/clang/test/CodeGen/AArch64/neon/bf16-vduph.c index e38383f567d98..3196bf4c643b2 100644 --- a/clang/test/CodeGen/AArch64/neon/bf16-vduph.c +++ b/clang/test/CodeGen/AArch64/neon/bf16-vduph.c @@ -4,27 +4,23 @@ // RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM %} // RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +bf16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} -typedef __bf16 bfloat16_t; -typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t; -typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t; +#include <arm_neon.h> // LLVM-LABEL: @test_vduph_lane_bf16( -// LLVM-SAME: <4 x bfloat> {{.*}} [[V:%.*]]) -// LLVM: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V]], i{{32|64}} 1 -// LLVM: ret bfloat [[VGET_LANE]] // CIR-LABEL: @test_vduph_lane_bf16( -// CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !cir.bf16> -bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { - return __builtin_bit_cast(bfloat16_t, __builtin_neon_vduph_lane_bf16(v, 1)); -} - -// LLVM-LABEL: @test_vduph_laneq_bf16( -// LLVM-SAME: <8 x bfloat> {{.*}} [[V:%.*]]) -// LLVM: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V]], i{{32|64}} 7 -// LLVM: ret bfloat [[VGETQ_LANE]] -// CIR-LABEL: @test_vduph_laneq_bf16( -// CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<8 x !cir.bf16> -bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { - return __builtin_bit_cast(bfloat16_t, - __builtin_neon_vduph_laneq_bf16(v, 7)); -} + bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { + // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !cir.bf16> + // LLVM: %{{.*}} = extractelement <4 x bfloat> %{{.*}}, i{{32|64}} 1 + // LLVM: ret bfloat %{{.*}} + return vduph_lane_bf16(v, 1); + } + + // LLVM-LABEL: @test_vduph_laneq_bf16( + // CIR-LABEL: @test_vduph_laneq_bf16( + bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { + // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<8 x !cir.bf16> + // LLVM: %{{.*}} = extractelement <8 x bfloat> %{{.*}}, i{{32|64}} 7 + // LLVM: ret bfloat %{{.*}} + return vduph_laneq_bf16(v, 7); + } + \ No newline at end of file >From 0b4a1033bbcec6ae4c54d8082a3f7517bfc92dbb Mon Sep 17 00:00:00 2001 From: E0N777 <[email protected]> Date: Fri, 13 Mar 2026 11:53:26 +0800 Subject: [PATCH 3/3] [ClangIR][AArch64] Lower BF16 vduph lane builtins Lower `__builtin_neon_vduph_lane_bf16` and `__builtin_neon_vduph_laneq_bf16` to `cir.vec.extract`. Also migrate the corresponding ACLE tests from `clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c` to `clang/test/CodeGen/AArch64/neon/bf16-getset.c`. --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 31 +++++++++++-------- .../CodeGen/AArch64/bf16-getset-intrinsics.c | 18 ----------- .../neon/{bf16-vduph.c => bf16-getset.c} | 31 +++++++++---------- 3 files changed, 33 insertions(+), 47 deletions(-) rename clang/test/CodeGen/AArch64/neon/{bf16-vduph.c => bf16-getset.c} (59%) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 82d952d5066ba..8aa6e368b901c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2724,10 +2724,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vset_lane_mf8: case NEON::BI__builtin_neon_vsetq_lane_mf8: case NEON::BI__builtin_neon_vsetq_lane_f64: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AArch64 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return mlir::Value{}; case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vdupb_lane_i8: case NEON::BI__builtin_neon_vgetq_lane_i8: @@ -2756,15 +2752,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vdups_laneq_f32: case NEON::BI__builtin_neon_vgetq_lane_f64: case NEON::BI__builtin_neon_vdupd_laneq_f64: - case NEON::BI__builtin_neon_vget_lane_bf16: - case NEON::BI__builtin_neon_vduph_lane_bf16: - case NEON::BI__builtin_neon_vduph_lane_f16: - case NEON::BI__builtin_neon_vgetq_lane_bf16: - case NEON::BI__builtin_neon_vduph_laneq_bf16: - case NEON::BI__builtin_neon_vduph_laneq_f16: { - uint64_t index = getZExtIntValueFromConstOp(ops[1]); - return builder.createExtractElement(loc, ops[0], index); - } case NEON::BI__builtin_neon_vaddh_f16: case NEON::BI__builtin_neon_vsubh_f16: case NEON::BI__builtin_neon_vmulh_f16: @@ -2814,6 +2801,24 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vqdmlals_laneq_s32: case NEON::BI__builtin_neon_vqdmlsls_lane_s32: case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: + case NEON::BI__builtin_neon_vget_lane_bf16: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; + case NEON::BI__builtin_neon_vduph_lane_bf16: { + return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]); + } + case NEON::BI__builtin_neon_vduph_lane_f16: + case NEON::BI__builtin_neon_vgetq_lane_bf16: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; + case NEON::BI__builtin_neon_vduph_laneq_bf16: { + return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]); + } + case NEON::BI__builtin_neon_vduph_laneq_f16: case NEON::BI__builtin_neon_vcvt_bf16_f32: case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: diff --git a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c index 6ce6e37137cd0..55eb5210829d2 100644 --- a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c +++ b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c @@ -152,21 +152,3 @@ bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) { bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) { return vsetq_lane_bf16(a, v, 7); } - -// CHECK-LABEL: @test_vduph_lane_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1 -// CHECK-NEXT: ret bfloat [[VGET_LANE]] -// -bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { - return vduph_lane_bf16(v, 1); -} - -// CHECK-LABEL: @test_vduph_laneq_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7 -// CHECK-NEXT: ret bfloat [[VGETQ_LANE]] -// -bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { - return vduph_laneq_bf16(v, 7); -} diff --git a/clang/test/CodeGen/AArch64/neon/bf16-vduph.c b/clang/test/CodeGen/AArch64/neon/bf16-getset.c similarity index 59% rename from clang/test/CodeGen/AArch64/neon/bf16-vduph.c rename to clang/test/CodeGen/AArch64/neon/bf16-getset.c index 3196bf4c643b2..77fc41b6c8ab1 100644 --- a/clang/test/CodeGen/AArch64/neon/bf16-vduph.c +++ b/clang/test/CodeGen/AArch64/neon/bf16-getset.c @@ -8,19 +8,18 @@ // LLVM-LABEL: @test_vduph_lane_bf16( // CIR-LABEL: @test_vduph_lane_bf16( - bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { - // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<4 x !cir.bf16> - // LLVM: %{{.*}} = extractelement <4 x bfloat> %{{.*}}, i{{32|64}} 1 - // LLVM: ret bfloat %{{.*}} - return vduph_lane_bf16(v, 1); - } - - // LLVM-LABEL: @test_vduph_laneq_bf16( - // CIR-LABEL: @test_vduph_laneq_bf16( - bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { - // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !u64i] : !cir.vector<8 x !cir.bf16> - // LLVM: %{{.*}} = extractelement <8 x bfloat> %{{.*}}, i{{32|64}} 7 - // LLVM: ret bfloat %{{.*}} - return vduph_laneq_bf16(v, 7); - } - \ No newline at end of file +bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { + // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<4 x !cir.bf16> + // LLVM: %{{.*}} = extractelement <4 x bfloat> %{{.*}}, i{{32|64}} 1 + // LLVM: ret bfloat %{{.*}} + return vduph_lane_bf16(v, 1); +} + +// LLVM-LABEL: @test_vduph_laneq_bf16( +// CIR-LABEL: @test_vduph_laneq_bf16( +bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { + // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<8 x !cir.bf16> + // LLVM: %{{.*}} = extractelement <8 x bfloat> %{{.*}}, i{{32|64}} 7 + // LLVM: ret bfloat %{{.*}} + return vduph_laneq_bf16(v, 7); +} \ No newline at end of file _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
