[clang] [CIR][AArch64] Upstream vset_lane_/vsetq_lane_ NEON builtins (PR #186623)

via cfe-commits Sat, 14 Mar 2026 13:35:30 -0700

https://github.com/abhijeetsharma200 created 
https://github.com/llvm/llvm-project/pull/186623


Part of #185382


>From f5ceed9e648bfa29fc89f1cedb518bcf9fae9e3d Mon Sep 17 00:00:00 2001
From: Abhijeet Sharma <[email protected]>
Date: Sat, 14 Mar 2026 19:59:12 +0100
Subject: [PATCH] [CIR][AArch64] Upstream vset_lane_*/vsetq_lane_* NEON
 builtins, Part of #185382

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  17 ++
 .../CodeGen/AArch64/bf16-getset-intrinsics.c  |  17 --
 clang/test/CodeGen/AArch64/neon-vget.c        | 189 -------------
 clang/test/CodeGen/AArch64/neon/neon-vset.c   | 259 ++++++++++++++++++
 4 files changed, 276 insertions(+), 206 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/neon/neon-vset.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 5534e69b5f8bc..87d2c228d6ebe 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2708,6 +2708,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   }
   case NEON::BI__builtin_neon_vtstd_s64:
   case NEON::BI__builtin_neon_vtstd_u64:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vset_lane_i8:
   case NEON::BI__builtin_neon_vset_lane_i16:
   case NEON::BI__builtin_neon_vset_lane_i32:
@@ -2720,10 +2724,23 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vsetq_lane_i64:
   case NEON::BI__builtin_neon_vsetq_lane_bf16:
   case NEON::BI__builtin_neon_vsetq_lane_f32:
+    return cir::VecInsertOp::create(builder, loc, ops[1], ops[0], ops[2]);
   case NEON::BI__builtin_neon_vset_lane_f64:
+    // The vector type needs a cast for the v1f64 variant.
+    ops[1] = builder.createBitcast(ops[1],
+                                   cir::VectorType::get(builder.getDoubleTy(), 
1));
+    return cir::VecInsertOp::create(builder, loc, ops[1], ops[0], ops[2]);
   case NEON::BI__builtin_neon_vset_lane_mf8:
   case NEON::BI__builtin_neon_vsetq_lane_mf8:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vsetq_lane_f64:
+    // The vector type needs a cast for the v2f64 variant.
+    ops[1] = builder.createBitcast(ops[1],
+                                   cir::VectorType::get(builder.getDoubleTy(), 
2));
+    return cir::VecInsertOp::create(builder, loc, ops[1], ops[0], ops[2]);
   case NEON::BI__builtin_neon_vget_lane_i8:
   case NEON::BI__builtin_neon_vdupb_lane_i8:
   case NEON::BI__builtin_neon_vgetq_lane_i8:
diff --git a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c 
b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
index 6ce6e37137cd0..9cec8a2b79d22 100644
--- a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
@@ -135,23 +135,6 @@ bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) {
   return vgetq_lane_bf16(v, 7);
 }
 
-// CHECK-LABEL: @test_vset_lane_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], 
bfloat [[A:%.*]], i32 1
-// CHECK-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
-//
-bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) {
-  return vset_lane_bf16(a, v, 1);
-}
-
-// CHECK-LABEL: @test_vsetq_lane_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], 
bfloat [[A:%.*]], i32 7
-// CHECK-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
-//
-bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) {
-  return vsetq_lane_bf16(a, v, 7);
-}
 
 // CHECK-LABEL: @test_vduph_lane_bf16(
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGen/AArch64/neon-vget.c 
b/clang/test/CodeGen/AArch64/neon-vget.c
index ebc8c2f9228ed..29ea47c12c51b 100644
--- a/clang/test/CodeGen/AArch64/neon-vget.c
+++ b/clang/test/CodeGen/AArch64/neon-vget.c
@@ -187,192 +187,3 @@ int64_t test_vgetq_lane_s64(int64x2_t a) {
 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
   return vgetq_lane_u64(a, 1);
 }
-
-
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vset_lane_u8(i8 noundef %a, <8 x 
i8> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
-// CHECK:   ret <8 x i8> [[VSET_LANE]]
-uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
-  return vset_lane_u8(a, b, 7);
-}
-
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vset_lane_u16(i16 noundef %a, <4 
x i16> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
-// CHECK:   ret <4 x i16> [[VSET_LANE]]
-uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
-  return vset_lane_u16(a, b, 3);
-}
-
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vset_lane_u32(i32 noundef %a, <2 
x i32> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
-// CHECK:   ret <2 x i32> [[VSET_LANE]]
-uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
-  return vset_lane_u32(a, b, 1);
-}
-
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vset_lane_s8(i8 noundef %a, <8 x 
i8> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
-// CHECK:   ret <8 x i8> [[VSET_LANE]]
-int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
-  return vset_lane_s8(a, b, 7);
-}
-
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vset_lane_s16(i16 noundef %a, <4 
x i16> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
-// CHECK:   ret <4 x i16> [[VSET_LANE]]
-int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
-  return vset_lane_s16(a, b, 3);
-}
-
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vset_lane_s32(i32 noundef %a, <2 
x i32> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
-// CHECK:   ret <2 x i32> [[VSET_LANE]]
-int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
-  return vset_lane_s32(a, b, 1);
-}
-
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vset_lane_p8(i8 noundef %a, <8 x 
i8> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
-// CHECK:   ret <8 x i8> [[VSET_LANE]]
-poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
-  return vset_lane_p8(a, b, 7);
-}
-
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vset_lane_p16(i16 noundef %a, <4 
x i16> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
-// CHECK:   ret <4 x i16> [[VSET_LANE]]
-poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
-  return vset_lane_p16(a, b, 3);
-}
-
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vset_lane_f32(float noundef %a, 
<2 x float> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
-// CHECK:   ret <2 x float> [[VSET_LANE]]
-float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
-  return vset_lane_f32(a, b, 1);
-}
-
-// CHECK-LABEL: define{{.*}} <4 x half> @test_vset_lane_f16(ptr noundef %a, <4 
x half> noundef %b) #0 {
-// CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
-// CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
-// CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
-// CHECK:   [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK:   store half [[TMP0]], ptr [[__REINT_246]], align 2
-// CHECK:   store <4 x half> %b, ptr [[__REINT1_246]], align 8
-// CHECK:   [[TMP2:%.*]] = load i16, ptr [[__REINT_246]], align 2
-// CHECK:   [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT1_246]], align 8
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 
[[TMP2]], i32 3
-// CHECK:   store <4 x i16> [[VSET_LANE]], ptr [[__REINT2_246]], align 8
-// CHECK:   [[TMP8:%.*]] = load <4 x half>, ptr [[__REINT2_246]], align 8
-// CHECK:   ret <4 x half> [[TMP8]]
-float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
-  return vset_lane_f16(*a, b, 3);
-}
-
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vsetq_lane_u8(i8 noundef %a, <16 
x i8> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
-// CHECK:   ret <16 x i8> [[VSET_LANE]]
-uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
-  return vsetq_lane_u8(a, b, 15);
-}
-
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vsetq_lane_u16(i16 noundef %a, <8 
x i16> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
-// CHECK:   ret <8 x i16> [[VSET_LANE]]
-uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
-  return vsetq_lane_u16(a, b, 7);
-}
-
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vsetq_lane_u32(i32 noundef %a, <4 
x i32> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
-// CHECK:   ret <4 x i32> [[VSET_LANE]]
-uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
-  return vsetq_lane_u32(a, b, 3);
-}
-
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vsetq_lane_s8(i8 noundef %a, <16 
x i8> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
-// CHECK:   ret <16 x i8> [[VSET_LANE]]
-int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
-  return vsetq_lane_s8(a, b, 15);
-}
-
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vsetq_lane_s16(i16 noundef %a, <8 
x i16> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
-// CHECK:   ret <8 x i16> [[VSET_LANE]]
-int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
-  return vsetq_lane_s16(a, b, 7);
-}
-
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vsetq_lane_s32(i32 noundef %a, <4 
x i32> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
-// CHECK:   ret <4 x i32> [[VSET_LANE]]
-int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
-  return vsetq_lane_s32(a, b, 3);
-}
-
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vsetq_lane_p8(i8 noundef %a, <16 
x i8> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
-// CHECK:   ret <16 x i8> [[VSET_LANE]]
-poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
-  return vsetq_lane_p8(a, b, 15);
-}
-
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vsetq_lane_p16(i16 noundef %a, <8 
x i16> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
-// CHECK:   ret <8 x i16> [[VSET_LANE]]
-poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
-  return vsetq_lane_p16(a, b, 7);
-}
-
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vsetq_lane_f32(float noundef 
%a, <4 x float> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
-// CHECK:   ret <4 x float> [[VSET_LANE]]
-float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
-  return vsetq_lane_f32(a, b, 3);
-}
-
-// CHECK-LABEL: define{{.*}} <8 x half> @test_vsetq_lane_f16(ptr noundef %a, 
<8 x half> noundef %b) #0 {
-// CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
-// CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
-// CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
-// CHECK:   [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK:   store half [[TMP0]], ptr [[__REINT_248]], align 2
-// CHECK:   store <8 x half> %b, ptr [[__REINT1_248]], align 16
-// CHECK:   [[TMP2:%.*]] = load i16, ptr [[__REINT_248]], align 2
-// CHECK:   [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT1_248]], align 16
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 
[[TMP2]], i32 7
-// CHECK:   store <8 x i16> [[VSET_LANE]], ptr [[__REINT2_248]], align 16
-// CHECK:   [[TMP8:%.*]] = load <8 x half>, ptr [[__REINT2_248]], align 16
-// CHECK:   ret <8 x half> [[TMP8]]
-float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
-  return vsetq_lane_f16(*a, b, 7);
-}
-
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vset_lane_s64(i64 noundef %a, <1 
x i64> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
-// CHECK:   ret <1 x i64> [[VSET_LANE]]
-int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
-  return vset_lane_s64(a, b, 0);
-}
-
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vset_lane_u64(i64 noundef %a, <1 
x i64> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
-// CHECK:   ret <1 x i64> [[VSET_LANE]]
-uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
-  return vset_lane_u64(a, b, 0);
-}
-
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vsetq_lane_s64(i64 noundef %a, <2 
x i64> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
-// CHECK:   ret <2 x i64> [[VSET_LANE]]
-int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
-  return vsetq_lane_s64(a, b, 1);
-}
-
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vsetq_lane_u64(i64 noundef %a, <2 
x i64> noundef %b) #0 {
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
-// CHECK:   ret <2 x i64> [[VSET_LANE]]
-uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
-  return vsetq_lane_u64(a, b, 1);
-}
diff --git a/clang/test/CodeGen/AArch64/neon/neon-vset.c 
b/clang/test/CodeGen/AArch64/neon/neon-vset.c
new file mode 100644
index 0000000000000..dac09e85df9be
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon/neon-vset.c
@@ -0,0 +1,259 @@
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// RUN:                   %clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +bf16 -disable-O0-optnone 
-flax-vector-conversions=none           -emit-llvm -o - %s | opt -S 
-passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +bf16 -disable-O0-optnone 
-flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S 
-passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM %}
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +bf16 -disable-O0-optnone 
-flax-vector-conversions=none -fclangir -emit-cir  -o - %s |                    
           FileCheck %s --check-prefixes=CIR %}
+
+//=============================================================================
+// NOTES
+//
+// This file contains tests originally located in:
+//  * clang/test/CodeGen/AArch64/neon-vget.c  (vset_lane_* / vsetq_lane_* 
section)
+//  * clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c  (bf16 vset variants)
+//
+// The main difference is the use of RUN lines that enable ClangIR lowering;
+// therefore only builtins currently supported by ClangIR are tested here.
+//
+// The f16 variants (vset_lane_f16 / vsetq_lane_f16) are intentionally omitted:
+// they are implemented in arm_neon.h via pointer-based bit_cast rather than
+// a direct builtin call, producing complex alloca/store/load sequences that
+// differ between the two code paths.
+//
+// The p8/p16 poly variants (vset_lane_p8, vset_lane_p16, vsetq_lane_p8,
+// vsetq_lane_p16) are intentionally omitted: the arm_neon.h macros for these
+// perform an implicit poly-vector → int-vector conversion which is rejected
+// when -flax-vector-conversions=none is in effect.
+//
+// ACLE reference:
+//  
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#set-vector-lane
+//=============================================================================
+
+#include <arm_neon.h>
+
+//===------------------------------------------------------===//
+// 64-bit vector (vset_lane_*)
+//===------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vset_lane_u8(
+// CIR-LABEL: @test_vset_lane_u8(
+uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <8 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+// LLVM: ret <8 x i8> [[VSET_LANE]]
+  return vset_lane_u8(a, b, 7);
+}
+
+// LLVM-LABEL: @test_vset_lane_u16(
+// CIR-LABEL: @test_vset_lane_u16(
+uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i32 
3
+// LLVM: ret <4 x i16> [[VSET_LANE]]
+  return vset_lane_u16(a, b, 3);
+}
+
+// LLVM-LABEL: @test_vset_lane_u32(
+// CIR-LABEL: @test_vset_lane_u32(
+uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <2 x i32> %{{.*}}, i32 %{{.*}}, i32 
1
+// LLVM: ret <2 x i32> [[VSET_LANE]]
+  return vset_lane_u32(a, b, 1);
+}
+
+// LLVM-LABEL: @test_vset_lane_s8(
+// CIR-LABEL: @test_vset_lane_s8(
+int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<8 x 
!s8i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <8 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+// LLVM: ret <8 x i8> [[VSET_LANE]]
+  return vset_lane_s8(a, b, 7);
+}
+
+// LLVM-LABEL: @test_vset_lane_s16(
+// CIR-LABEL: @test_vset_lane_s16(
+int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<4 x 
!s16i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i32 
3
+// LLVM: ret <4 x i16> [[VSET_LANE]]
+  return vset_lane_s16(a, b, 3);
+}
+
+// LLVM-LABEL: @test_vset_lane_s32(
+// CIR-LABEL: @test_vset_lane_s32(
+int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<2 x 
!s32i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <2 x i32> %{{.*}}, i32 %{{.*}}, i32 
1
+// LLVM: ret <2 x i32> [[VSET_LANE]]
+  return vset_lane_s32(a, b, 1);
+}
+
+// LLVM-LABEL: @test_vset_lane_f32(
+// CIR-LABEL: @test_vset_lane_f32(
+float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<2 x 
!cir.float>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <2 x float> %{{.*}}, float %{{.*}}, 
i32 1
+// LLVM: ret <2 x float> [[VSET_LANE]]
+  return vset_lane_f32(a, b, 1);
+}
+
+// LLVM-LABEL: @test_vset_lane_s64(
+// CIR-LABEL: @test_vset_lane_s64(
+int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<1 x 
!s64i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <1 x i64> %{{.*}}, i64 %{{.*}}, i32 0
+// LLVM: ret <1 x i64> [[VSET_LANE]]
+  return vset_lane_s64(a, b, 0);
+}
+
+// LLVM-LABEL: @test_vset_lane_u64(
+// CIR-LABEL: @test_vset_lane_u64(
+uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <1 x i64> %{{.*}}, i64 %{{.*}}, i32 0
+// LLVM: ret <1 x i64> [[VSET_LANE]]
+  return vset_lane_u64(a, b, 0);
+}
+
+// LLVM-LABEL: @test_vset_lane_f64(
+// CIR-LABEL: @test_vset_lane_f64(
+float64x1_t test_vset_lane_f64(float64_t a, float64x1_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<1 x 
!cir.double>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <1 x double> %{{.*}}, double 
%{{.*}}, i32 0
+// LLVM: ret <1 x double> [[VSET_LANE]]
+  return vset_lane_f64(a, b, 0);
+}
+
+// LLVM-LABEL: @test_vset_lane_bf16(
+// CIR-LABEL: @test_vset_lane_bf16(
+bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<4 x 
!cir.bf16>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> %{{.*}}, bfloat 
%{{.*}}, i32 1
+// LLVM: ret <4 x bfloat> [[VSET_LANE]]
+  return vset_lane_bf16(a, b, 1);
+}
+
+//===------------------------------------------------------===//
+// 128-bit vector (vsetq_lane_*)
+//===------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vsetq_lane_u8(
+// CIR-LABEL: @test_vsetq_lane_u8(
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 
15
+// LLVM: ret <16 x i8> [[VSET_LANE]]
+  return vsetq_lane_u8(a, b, 15);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_u16(
+// CIR-LABEL: @test_vsetq_lane_u16(
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 
7
+// LLVM: ret <8 x i16> [[VSET_LANE]]
+  return vsetq_lane_u16(a, b, 7);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_u32(
+// CIR-LABEL: @test_vsetq_lane_u32(
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 
3
+// LLVM: ret <4 x i32> [[VSET_LANE]]
+  return vsetq_lane_u32(a, b, 3);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_s8(
+// CIR-LABEL: @test_vsetq_lane_s8(
+int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<16 x 
!s8i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 
15
+// LLVM: ret <16 x i8> [[VSET_LANE]]
+  return vsetq_lane_s8(a, b, 15);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_s16(
+// CIR-LABEL: @test_vsetq_lane_s16(
+int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<8 x 
!s16i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 
7
+// LLVM: ret <8 x i16> [[VSET_LANE]]
+  return vsetq_lane_s16(a, b, 7);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_s32(
+// CIR-LABEL: @test_vsetq_lane_s32(
+int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<4 x 
!s32i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 
3
+// LLVM: ret <4 x i32> [[VSET_LANE]]
+  return vsetq_lane_s32(a, b, 3);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_f32(
+// CIR-LABEL: @test_vsetq_lane_f32(
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<4 x 
!cir.float>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <4 x float> %{{.*}}, float %{{.*}}, 
i32 3
+// LLVM: ret <4 x float> [[VSET_LANE]]
+  return vsetq_lane_f32(a, b, 3);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_s64(
+// CIR-LABEL: @test_vsetq_lane_s64(
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<2 x 
!s64i>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 
1
+// LLVM: ret <2 x i64> [[VSET_LANE]]
+  return vsetq_lane_s64(a, b, 1);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_u64(
+// CIR-LABEL: @test_vsetq_lane_u64(
+uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
+// CIR: cir.vec.insert
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 
1
+// LLVM: ret <2 x i64> [[VSET_LANE]]
+  return vsetq_lane_u64(a, b, 1);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_f64(
+// CIR-LABEL: @test_vsetq_lane_f64(
+float64x2_t test_vsetq_lane_f64(float64_t a, float64x2_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<2 x 
!cir.double>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <2 x double> %{{.*}}, double 
%{{.*}}, i32 1
+// LLVM: ret <2 x double> [[VSET_LANE]]
+  return vsetq_lane_f64(a, b, 1);
+}
+
+// LLVM-LABEL: @test_vsetq_lane_bf16(
+// CIR-LABEL: @test_vsetq_lane_bf16(
+bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t b) {
+// CIR: cir.vec.insert %{{.*}}, %{{.*}}[%{{.*}} : {{.*}}] : !cir.vector<8 x 
!cir.bf16>
+
+// LLVM: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> %{{.*}}, bfloat 
%{{.*}}, i32 7
+// LLVM: ret <8 x bfloat> [[VSET_LANE]]
+  return vsetq_lane_bf16(a, b, 7);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AArch64] Upstream vset_lane_*/vsetq_lane_* NEON builtins (PR #186623)

Reply via email to

[clang] [CIR][AArch64] Upstream vset_lane_/vsetq_lane_ NEON builtins (PR #186623)