[clang] 04797bc - [CIR][AArch64] Lower BF16 vduph lane builtins (#185852)

via cfe-commits Mon, 16 Mar 2026 09:50:55 -0700

Author: Jiahao Guo
Date: 2026-03-16T16:50:41Z
New Revision: 04797bc69268d4c975c18a499d1be66f470c91ba


URL: 
https://github.com/llvm/llvm-project/commit/04797bc69268d4c975c18a499d1be66f470c91ba
DIFF: 
https://github.com/llvm/llvm-project/commit/04797bc69268d4c975c18a499d1be66f470c91ba.diff

LOG: [CIR][AArch64] Lower BF16 vduph lane builtins (#185852)

Part of #185382.

Lower `__builtin_neon_vduph_lane_bf16` and
`__builtin_neon_vduph_laneq_bf16` in ClangIR to `cir.vec.extract`,
and add dedicated AArch64 Neon BF16 tests.

This is my first LLVM PR, so I'd really appreciate any suggestions on
the implementation, test structure, or general LLVM contribution style.

Added: 
    clang/test/CodeGen/AArch64/neon/bf16-getset.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
    clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 5534e69b5f8bc..8aa6e368b901c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2802,10 +2802,22 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32:
   case NEON::BI__builtin_neon_vget_lane_bf16:
-  case NEON::BI__builtin_neon_vduph_lane_bf16:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
+  case NEON::BI__builtin_neon_vduph_lane_bf16: {
+    return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
+  }
   case NEON::BI__builtin_neon_vduph_lane_f16:
   case NEON::BI__builtin_neon_vgetq_lane_bf16:
-  case NEON::BI__builtin_neon_vduph_laneq_bf16:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
+  case NEON::BI__builtin_neon_vduph_laneq_bf16: {
+    return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
+  }
   case NEON::BI__builtin_neon_vduph_laneq_f16:
   case NEON::BI__builtin_neon_vcvt_bf16_f32:
   case NEON::BI__builtin_neon_vcvtq_low_bf16_f32:

diff  --git a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c 
b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
index 6ce6e37137cd0..55eb5210829d2 100644
--- a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
@@ -152,21 +152,3 @@ bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, 
bfloat16x4_t v) {
 bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) {
   return vsetq_lane_bf16(a, v, 7);
 }
-
-// CHECK-LABEL: @test_vduph_lane_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], 
i32 1
-// CHECK-NEXT:    ret bfloat [[VGET_LANE]]
-//
-bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) {
-  return vduph_lane_bf16(v, 1);
-}
-
-// CHECK-LABEL: @test_vduph_laneq_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], 
i32 7
-// CHECK-NEXT:    ret bfloat [[VGETQ_LANE]]
-//
-bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) {
-  return vduph_laneq_bf16(v, 7);
-}

diff  --git a/clang/test/CodeGen/AArch64/neon/bf16-getset.c 
b/clang/test/CodeGen/AArch64/neon/bf16-getset.c
new file mode 100644
index 0000000000000..759b2a5af7121
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon/bf16-getset.c
@@ -0,0 +1,28 @@
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// RUN:                   %clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +bf16 -disable-O0-optnone 
-flax-vector-conversions=none           -emit-llvm -o - %s | opt -S 
-passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +bf16 -disable-O0-optnone 
-flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S 
-passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM %}
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +bf16 -disable-O0-optnone 
-flax-vector-conversions=none -fclangir -emit-cir  -o - %s |                    
           FileCheck %s --check-prefixes=ALL,CIR %}
+
+#include <arm_neon.h>
+
+//===------------------------------------------------------===//
+// BF16 get/set lane
+//===------------------------------------------------------===//
+// TODO: Add the remaining intrinsics from this group.
+
+// ALL-LABEL: @test_vduph_lane_bf16(
+bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) {
+  // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<4 x !cir.bf16>
+  // LLVM: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> %{{.*}}, i32 1
+  // LLVM: ret bfloat [[VGET_LANE]]
+  return vduph_lane_bf16(v, 1);
+}
+
+// ALL-LABEL: @test_vduph_laneq_bf16(
+bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) {
+  // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<8 x !cir.bf16>
+  // LLVM: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 7
+  // LLVM: ret bfloat [[VGETQ_LANE]]
+  return vduph_laneq_bf16(v, 7);
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 04797bc - [CIR][AArch64] Lower BF16 vduph lane builtins (#185852)

Reply via email to