Author: neonetizen Date: 2026-04-06T19:12:34+01:00 New Revision: e11a31f4c7f61a3abb0f9101f3269e5622195788
URL: https://github.com/llvm/llvm-project/commit/e11a31f4c7f61a3abb0f9101f3269e5622195788 DIFF: https://github.com/llvm/llvm-project/commit/e11a31f4c7f61a3abb0f9101f3269e5622195788.diff LOG: [CIR][AArch64] Lower FP16 vduph lane intrinsics (#186955) >From #185382 Lower `vduph_lane_f16` and `vduph_laneq_f16` to `cir::VecExtractOp` Tests moved from `v8.2a-neon-instrinsics-generic.c` to a new CIR-enabled test file. I tried following from notes made in #185852 (BF16) Added: clang/test/CodeGen/AArch64/neon/f16-getset.c Modified: clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 3d1e11ab87354..3a9e7e2650500 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -139,10 +139,9 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad)); case NeonTypeFlags::Float16: if (hasLegalHalfType) - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); - else - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); - [[fallthrough]]; + return cir::VectorType::get(cgf->getCIRGenModule().fP16Ty, + v1Ty ? 1 : (4 << isQuad)); + return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad)); case NeonTypeFlags::Int32: return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty : cgf->sInt32Ty, @@ -2219,7 +2218,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vduph_lane_bf16: { return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]); } - case NEON::BI__builtin_neon_vduph_lane_f16: + case NEON::BI__builtin_neon_vduph_lane_f16: { + return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]); + } case NEON::BI__builtin_neon_vgetq_lane_bf16: cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + @@ -2228,7 +2229,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vduph_laneq_bf16: { return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]); } - case NEON::BI__builtin_neon_vduph_laneq_f16: + case NEON::BI__builtin_neon_vduph_laneq_f16: { + return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]); + } case NEON::BI__builtin_neon_vcvt_bf16_f32: case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: diff --git a/clang/test/CodeGen/AArch64/neon/f16-getset.c b/clang/test/CodeGen/AArch64/neon/f16-getset.c new file mode 100644 index 0000000000000..7e4d56c4a0a40 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/f16-getset.c @@ -0,0 +1,101 @@ +// REQUIRES: aarch64-registered-target || arm-registered-target + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -disable-O0-optnone -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR %} + +#include <arm_neon.h> + +//===------------------------------------------------------===// +// 2.7.2.4 Set all lanes to the same value +//===------------------------------------------------------===// + +// ALL-LABEL: @test_vdup_n_f16( +float16x4_t test_vdup_n_f16(float16_t a) { + // CIR: cir.vec.create(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !cir.f16, !cir.f16, !cir.f16, !cir.f16) : !cir.vector<4 x !cir.f16> + + // LLVM-SAME: half noundef [[A:%.*]]) + // LLVM: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i{{32|64}} 0 + // LLVM-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i{{32|64}} 1 + // LLVM-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i{{32|64}} 2 + // LLVM-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i{{32|64}} 3 + // LLVM: ret <4 x half> [[VECINIT3]] + return vdup_n_f16(a); +} + +// ALL-LABEL: @test_vdupq_n_f16( +float16x8_t test_vdupq_n_f16(float16_t a) { + // CIR: cir.vec.create(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !cir.f16, !cir.f16, !cir.f16, !cir.f16, !cir.f16, !cir.f16, !cir.f16, !cir.f16) : !cir.vector<8 x !cir.f16> + + // LLVM-SAME: half noundef [[A:%.*]]) + // LLVM: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i{{32|64}} 0 + // LLVM-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i{{32|64}} 1 + // LLVM-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i{{32|64}} 2 + // LLVM-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i{{32|64}} 3 + // LLVM-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i{{32|64}} 4 + // LLVM-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i{{32|64}} 5 + // LLVM-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i{{32|64}} 6 + // LLVM-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i{{32|64}} 7 + // LLVM: ret <8 x half> [[VECINIT7]] + return vdupq_n_f16(a); +} + +// ALL-LABEL: @test_vdup_lane_f16( +float16x4_t test_vdup_lane_f16(float16x4_t a) { + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.f16>) [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.f16> + + // LLVM-SAME: <4 x half> noundef [[A:%.*]]) + // LLVM: [[LANE:%.*]] = shufflevector <4 x half> {{.*}}, <4 x half> {{.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> + // LLVM: ret <4 x half> [[LANE]] + return vdup_lane_f16(a, 3); +} + +// ALL-LABEL: @test_vdupq_lane_f16( +float16x8_t test_vdupq_lane_f16(float16x4_t a) { + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.f16>) [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.f16> + + // LLVM-SAME: <4 x half> noundef [[A:%.*]]) + // LLVM: [[LANE:%.*]] = shufflevector <4 x half> {{.*}}, <4 x half> {{.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> + // LLVM: ret <8 x half> [[LANE]] + return vdupq_lane_f16(a, 3); +} + +// ALL-LABEL: @test_vdup_laneq_f16( +float16x4_t test_vdup_laneq_f16(float16x8_t a) { + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.f16>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<4 x !cir.f16> + + // LLVM-SAME: <8 x half> noundef [[A:%.*]]) + // LLVM: [[LANE:%.*]] = shufflevector <8 x half> {{.*}}, <8 x half> {{.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + // LLVM: ret <4 x half> [[LANE]] + return vdup_laneq_f16(a, 1); +} + +// ALL-LABEL: @test_vdupq_laneq_f16( +float16x8_t test_vdupq_laneq_f16(float16x8_t a) { + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.f16>) [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.f16> + + // LLVM-SAME: <8 x half> noundef [[A:%.*]]) + // LLVM: [[LANE:%.*]] = shufflevector <8 x half> {{.*}}, <8 x half> {{.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> + // LLVM: ret <8 x half> [[LANE]] + return vdupq_laneq_f16(a, 7); +} + +// ALL-LABEL: @test_vduph_lane_f16( +float16_t test_vduph_lane_f16(float16x4_t vec) { + // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<4 x !cir.f16> + + // LLVM-SAME: <4 x half> {{.*}}[[VEC:%.*]]) + // LLVM: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3 + // LLVM: ret half [[VGET_LANE]] + return vduph_lane_f16(vec, 3); +} + +// ALL-LABEL: @test_vduph_laneq_f16( +float16_t test_vduph_laneq_f16(float16x8_t vec) { + // CIR: cir.vec.extract %{{.*}}[%{{.*}} : !s32i] : !cir.vector<8 x !cir.f16> + + // LLVM-SAME: <8 x half> {{.*}}[[VEC:%.*]]) + // LLVM: [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7 + // LLVM: ret half [[VGETQ_LANE]] + return vduph_laneq_f16(vec, 7); +} diff --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c index 8c719178d7241..6da29d95075ec 100644 --- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c +++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c @@ -230,88 +230,6 @@ float16x8_t test_vmovq_n_f16(float16_t a) { return vmovq_n_f16(a); } -// CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: ret <4 x half> [[VECINIT3]] -// -float16x4_t test_vdup_n_f16(float16_t a) { - return vdup_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 -// CHECK-NEXT: ret <8 x half> [[VECINIT7]] -// -float16x8_t test_vdupq_n_f16(float16_t a) { - return vdupq_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK-NEXT: ret <4 x half> [[LANE]] -// -float16x4_t test_vdup_lane_f16(float16x4_t a) { - return vdup_lane_f16(a, 3); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -// CHECK-NEXT: ret <8 x half> [[LANE]] -// -float16x8_t test_vdupq_lane_f16(float16x4_t a) { - return vdupq_lane_f16(a, 3); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> -// CHECK-NEXT: ret <4 x half> [[LANE]] -// -float16x4_t test_vdup_laneq_f16(float16x8_t a) { - return vdup_laneq_f16(a, 1); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -// CHECK-NEXT: ret <8 x half> [[LANE]] -// -float16x8_t test_vdupq_laneq_f16(float16x8_t a) { - return vdupq_laneq_f16(a, 7); -} - // CHECK-LABEL: define {{[^@]+}}@test_vext_f16 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -483,23 +401,3 @@ float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) { float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) { return vtrn2q_f16(a, b); } - -// CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7 -// CHECK-NEXT: ret half [[VGETQ_LANE]] -// -float16_t test_vduph_laneq_f16(float16x8_t vec) { - return vduph_laneq_f16(vec, 7); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3 -// CHECK-NEXT: ret half [[VGET_LANE]] -// -float16_t test_vduph_lane_f16(float16x4_t vec) { - return vduph_lane_f16(vec, 3); -} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
