[clang] e11a31f - [CIR][AArch64] Lower FP16 vduph lane intrinsics (#186955)

via cfe-commits Mon, 06 Apr 2026 11:12:46 -0700

Author: neonetizen
Date: 2026-04-06T19:12:34+01:00
New Revision: e11a31f4c7f61a3abb0f9101f3269e5622195788


URL: 
https://github.com/llvm/llvm-project/commit/e11a31f4c7f61a3abb0f9101f3269e5622195788
DIFF: 
https://github.com/llvm/llvm-project/commit/e11a31f4c7f61a3abb0f9101f3269e5622195788.diff

LOG: [CIR][AArch64] Lower FP16 vduph lane intrinsics (#186955)

>From #185382 

Lower `vduph_lane_f16` and `vduph_laneq_f16` to `cir::VecExtractOp`

Tests moved from `v8.2a-neon-instrinsics-generic.c` to a new CIR-enabled
test file.

I tried following from notes made in #185852 (BF16)

Added: 
    clang/test/CodeGen/AArch64/neon/f16-getset.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
    clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 3d1e11ab87354..3a9e7e2650500 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -139,10 +139,9 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, 
NeonTypeFlags typeFlags,
     return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad));
   case NeonTypeFlags::Float16:
     if (hasLegalHalfType)
-      cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
-    else
-      cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
-    [[fallthrough]];
+      return cir::VectorType::get(cgf->getCIRGenModule().fP16Ty,
+                                  v1Ty ? 1 : (4 << isQuad));
+    return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad));
   case NeonTypeFlags::Int32:
     return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
                                                        : cgf->sInt32Ty,
@@ -2219,7 +2218,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vduph_lane_bf16: {
     return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
   }
-  case NEON::BI__builtin_neon_vduph_lane_f16:
+  case NEON::BI__builtin_neon_vduph_lane_f16: {
+    return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
+  }
   case NEON::BI__builtin_neon_vgetq_lane_bf16:
     cgm.errorNYI(expr->getSourceRange(),
                  std::string("unimplemented AArch64 builtin call: ") +
@@ -2228,7 +2229,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vduph_laneq_bf16: {
     return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
   }
-  case NEON::BI__builtin_neon_vduph_laneq_f16:
+  case NEON::BI__builtin_neon_vduph_laneq_f16: {
+    return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
+  }
   case NEON::BI__builtin_neon_vcvt_bf16_f32:
   case NEON::BI__builtin_neon_vcvtq_low_bf16_f32:
   case NEON::BI__builtin_neon_vcvtq_high_bf16_f32:

diff  --git a/clang/test/CodeGen/AArch64/neon/f16-getset.c 
b/clang/test/CodeGen/AArch64/neon/f16-getset.c
new file mode 100644
index 0000000000000..7e4d56c4a0a40
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon/f16-getset.c
@@ -0,0 +1,101 @@
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// RUN:                   %clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +fullfp16 -disable-O0-optnone 
-flax-vector-conversions=none           -emit-llvm -o - %s | opt -S 
-passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +fullfp16 -disable-O0-optnone 
-flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S 
-passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM %}
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu 
-target-feature +neon -target-feature +fullfp16 -disable-O0-optnone 
-flax-vector-conversions=none -fclangir -emit-cir  -o - %s |                    
           FileCheck %s --check-prefixes=ALL,CIR %}
+
+#include <arm_neon.h>
+
+//===------------------------------------------------------===//
+// 2.7.2.4 Set all lanes to the same value
+//===------------------------------------------------------===//
+
+// ALL-LABEL: @test_vdup_n_f16(
+float16x4_t test_vdup_n_f16(float16_t a) {
+  // CIR: cir.vec.create(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !cir.f16, 
!cir.f16, !cir.f16, !cir.f16) : !cir.vector<4 x !cir.f16>
+
+  // LLVM-SAME: half noundef [[A:%.*]])
+  // LLVM:      [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], 
i{{32|64}} 0
+  // LLVM-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half 
[[A]], i{{32|64}} 1
+  // LLVM-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half 
[[A]], i{{32|64}} 2
+  // LLVM-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half 
[[A]], i{{32|64}} 3
+  // LLVM:      ret <4 x half> [[VECINIT3]]
+  return vdup_n_f16(a);
+}
+
+// ALL-LABEL: @test_vdupq_n_f16(
+float16x8_t test_vdupq_n_f16(float16_t a) {
+  // CIR: cir.vec.create(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, 
%{{.*}}, %{{.*}} : !cir.f16, !cir.f16, !cir.f16, !cir.f16, !cir.f16, !cir.f16, 
!cir.f16, !cir.f16) : !cir.vector<8 x !cir.f16>
+
+  // LLVM-SAME: half noundef [[A:%.*]])
+  // LLVM:      [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], 
i{{32|64}} 0
+  // LLVM-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half 
[[A]], i{{32|64}} 1
+  // LLVM-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half 
[[A]], i{{32|64}} 2
+  // LLVM-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half 
[[A]], i{{32|64}} 3
+  // LLVM-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half 
[[A]], i{{32|64}} 4
+  // LLVM-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half 
[[A]], i{{32|64}} 5
+  // LLVM-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half 
[[A]], i{{32|64}} 6
+  // LLVM-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half 
[[A]], i{{32|64}} 7
+  // LLVM:      ret <8 x half> [[VECINIT7]]
+  return vdupq_n_f16(a);
+}
+
+// ALL-LABEL: @test_vdup_lane_f16(
+float16x4_t test_vdup_lane_f16(float16x4_t a) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.f16>) 
[#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : 
!s32i] : !cir.vector<4 x !cir.f16>
+
+  // LLVM-SAME: <4 x half> noundef [[A:%.*]])
+  // LLVM:      [[LANE:%.*]] = shufflevector <4 x half> {{.*}}, <4 x half> 
{{.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM:      ret <4 x half> [[LANE]]
+  return vdup_lane_f16(a, 3);
+}
+
+// ALL-LABEL: @test_vdupq_lane_f16(
+float16x8_t test_vdupq_lane_f16(float16x4_t a) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.f16>) 
[#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : 
!s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<8 x !cir.f16>
+
+  // LLVM-SAME: <4 x half> noundef [[A:%.*]])
+  // LLVM:      [[LANE:%.*]] = shufflevector <4 x half> {{.*}}, <4 x half> 
{{.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  // LLVM:      ret <8 x half> [[LANE]]
+  return vdupq_lane_f16(a, 3);
+}
+
+// ALL-LABEL: @test_vdup_laneq_f16(
+float16x4_t test_vdup_laneq_f16(float16x8_t a) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.f16>) 
[#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : 
!s32i] : !cir.vector<4 x !cir.f16>
+
+  // LLVM-SAME: <8 x half> noundef [[A:%.*]])
+  // LLVM:      [[LANE:%.*]] = shufflevector <8 x half> {{.*}}, <8 x half> 
{{.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM:      ret <4 x half> [[LANE]]
+  return vdup_laneq_f16(a, 1);
+}
+
+// ALL-LABEL: @test_vdupq_laneq_f16(
+float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.f16>) 
[#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : 
!s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<8 x !cir.f16>
+
+  // LLVM-SAME: <8 x half> noundef [[A:%.*]])
+  // LLVM:      [[LANE:%.*]] = shufflevector <8 x half> {{.*}}, <8 x half> 
{{.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM:      ret <8 x half> [[LANE]]
+  return vdupq_laneq_f16(a, 7);
+}
+
+// ALL-LABEL: @test_vduph_lane_f16(
+float16_t test_vduph_lane_f16(float16x4_t vec) {
+  // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<4 x !cir.f16>
+
+  // LLVM-SAME: <4 x half> {{.*}}[[VEC:%.*]])
+  // LLVM:      [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3
+  // LLVM:      ret half [[VGET_LANE]]
+  return vduph_lane_f16(vec, 3);
+}
+
+// ALL-LABEL: @test_vduph_laneq_f16(
+float16_t test_vduph_laneq_f16(float16x8_t vec) {
+  // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<8 x !cir.f16>
+
+  // LLVM-SAME: <8 x half> {{.*}}[[VEC:%.*]])
+  // LLVM:      [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7
+  // LLVM:      ret half [[VGETQ_LANE]]
+  return vduph_laneq_f16(vec, 7);
+}

diff  --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c 
b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
index 8c719178d7241..6da29d95075ec 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
@@ -230,88 +230,6 @@ float16x8_t test_vmovq_n_f16(float16_t a) {
   return vmovq_n_f16(a);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16
-// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half 
[[A]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], 
half [[A]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], 
half [[A]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], 
half [[A]], i32 3
-// CHECK-NEXT:    ret <4 x half> [[VECINIT3]]
-//
-float16x4_t test_vdup_n_f16(float16_t a) {
-  return vdup_n_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16
-// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half 
[[A]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], 
half [[A]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], 
half [[A]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], 
half [[A]], i32 3
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], 
half [[A]], i32 4
-// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], 
half [[A]], i32 5
-// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], 
half [[A]], i32 6
-// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], 
half [[A]], i32 7
-// CHECK-NEXT:    ret <8 x half> [[VECINIT7]]
-//
-float16x8_t test_vdupq_n_f16(float16_t a) {
-  return vdupq_n_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> 
[[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    ret <4 x half> [[LANE]]
-//
-float16x4_t test_vdup_lane_f16(float16x4_t a) {
-  return vdup_lane_f16(a, 3);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> 
[[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    ret <8 x half> [[LANE]]
-//
-float16x8_t test_vdupq_lane_f16(float16x4_t a) {
-  return vdupq_lane_f16(a, 3);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> 
[[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK-NEXT:    ret <4 x half> [[LANE]]
-//
-float16x4_t test_vdup_laneq_f16(float16x8_t a) {
-  return vdup_laneq_f16(a, 1);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> 
[[TMP2]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT:    ret <8 x half> [[LANE]]
-//
-float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
-  return vdupq_laneq_f16(a, 7);
-}
-
 // CHECK-LABEL: define {{[^@]+}}@test_vext_f16
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  entry:
@@ -483,23 +401,3 @@ float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) {
 float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
   return vtrn2q_f16(a, b);
 }
-
-// CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16
-// CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7
-// CHECK-NEXT:    ret half [[VGETQ_LANE]]
-//
-float16_t test_vduph_laneq_f16(float16x8_t vec) {
-  return vduph_laneq_f16(vec, 7);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16
-// CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3
-// CHECK-NEXT:    ret half [[VGET_LANE]]
-//
-float16_t test_vduph_lane_f16(float16x4_t vec) {
-  return vduph_lane_f16(vec, 3);
-}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] e11a31f - [CIR][AArch64] Lower FP16 vduph lane intrinsics (#186955)

Reply via email to