https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/183595
Add CIR lowering for the following AdvSIMD (NEON) intrinsic families: * vabd_* – Absolute difference https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference * vaba_* – Absolute difference and accumulate https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference-and-accumulate Tests for these intrinsics were split out from: test/CodeGen/AArch64/neon-intrinsics.c and moved to: test/CodeGen/AArch64/neon/intrinsics.c The following helper hooks were adapted from the ClangIR project: * getNeonType, emitNeonCall, emitNeonCallToOp. NOTE: Quad-word variants (e.g. vabaq_*) are not included in this change and will be added in a follow-up patch. Credit to the ClangIR contributors for the original implementation. From a17ab73376fe34cc863a4255b1633919c93d79c3 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Thu, 26 Feb 2026 18:44:15 +0000 Subject: [PATCH] [CIR][AArch64] Add lowering for vaba_* and vabd_* builtins (1/N) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CIR lowering for the following AdvSIMD (NEON) intrinsic families: * vabd_* – Absolute difference https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference * vaba_* – Absolute difference and accumulate https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference-and-accumulate Tests for these intrinsics were split out from: test/CodeGen/AArch64/neon-intrinsics.c and moved to: test/CodeGen/AArch64/neon/intrinsics.c The following helper hooks were adapted from the ClangIR project: * getNeonType, emitNeonCall, emitNeonCallToOp. NOTE: Quad-word variants (e.g. vabaq_*) are not included in this change and will be added in a follow-up patch. Credit to the ClangIR contributors for the original implementation. --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 3 + .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 136 +++++++++++ clang/test/CodeGen/AArch64/neon-intrinsics.c | 174 -------------- clang/test/CodeGen/AArch64/neon/intrinsics.c | 213 ++++++++++++++++++ 4 files changed, 352 insertions(+), 174 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index efae3d9d894ed..fb96050964fcc 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -349,6 +349,9 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { mlir::IntegerAttr align = {}, cir::SyncScopeKindAttr scope = {}, cir::MemOrderAttr order = {}) { + if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() != + val.getType()) + dst = createPtrBitcast(dst, val.getType()); return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope, order); } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 5129aa75f8f8d..af36be32b2cdd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -290,6 +290,118 @@ static bool hasExtraNeonArgument(unsigned builtinID) { return mask != 0; } +// TODO: Remove `loc` from the list of arguments once all NYIs are gone. +static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, + mlir::Location loc, + bool hasLegalHalfType = true, + bool v1Ty = false, + bool allowBFloatArgsAndRet = true) { + int isQuad = typeFlags.isQuad(); + switch (typeFlags.getEltType()) { + case NeonTypeFlags::Int8: + case NeonTypeFlags::Poly8: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty + : cgf->sInt8Ty, + v1Ty ? 1 : (8 << isQuad)); + case NeonTypeFlags::MFloat8: + cgf->getCIRGenModule().errorNYI( + loc, std::string("unimplemented NEON type: MFloat8")); + [[fallthrough]]; + case NeonTypeFlags::Int16: + case NeonTypeFlags::Poly16: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty + : cgf->sInt16Ty, + v1Ty ? 1 : (4 << isQuad)); + case NeonTypeFlags::BFloat16: + if (allowBFloatArgsAndRet) + cgf->getCIRGenModule().errorNYI( + loc, std::string("unimplemented NEON type: BFloat16")); + else + cgf->getCIRGenModule().errorNYI( + loc, std::string("unimplemented NEON type: BFloat16")); + case NeonTypeFlags::Float16: + if (hasLegalHalfType) + cgf->getCIRGenModule().errorNYI( + loc, std::string("unimplemented NEON type: Float16")); + else + cgf->getCIRGenModule().errorNYI( + loc, std::string("unimplemented NEON type: Float16")); + [[fallthrough]]; + case NeonTypeFlags::Int32: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty + : cgf->sInt32Ty, + v1Ty ? 1 : (2 << isQuad)); + case NeonTypeFlags::Int64: + case NeonTypeFlags::Poly64: + return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty + : cgf->sInt64Ty, + v1Ty ? 1 : (1 << isQuad)); + case NeonTypeFlags::Poly128: + // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. + // There is a lot of i128 and f128 API missing. + // so we use v16i8 to represent poly128 and get pattern matched. + cgf->getCIRGenModule().errorNYI( + loc, std::string("unimplemented NEON type: Poly128")); + [[fallthrough]]; + case NeonTypeFlags::Float32: + return cir::VectorType::get(cgf->getCIRGenModule().floatTy, + v1Ty ? 1 : (2 << isQuad)); + case NeonTypeFlags::Float64: + return cir::VectorType::get(cgf->getCIRGenModule().doubleTy, + v1Ty ? 1 : (1 << isQuad)); + } + llvm_unreachable("Unknown vector element type!"); +} + +template <typename Operation> +static mlir::Value emitNeonCallToOp( + CIRGenBuilderTy &builder, llvm::SmallVector<mlir::Type> argTypes, + llvm::SmallVectorImpl<mlir::Value> &args, + std::optional<llvm::StringRef> intrinsicName, mlir::Type funcResTy, + mlir::Location loc, bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { + // TODO: Consider removing the following unreachable when we have + // emitConstrainedFPCall feature implemented + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + if (isConstrainedFPIntrinsic) + llvm_unreachable("isConstrainedFPIntrinsic NYI"); + + for (unsigned j = 0; j < argTypes.size(); ++j) { + if (isConstrainedFPIntrinsic) { + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + } + if (shift > 0 && shift == j) { + llvm_unreachable("shift NYI"); + } else { + args[j] = builder.createBitcast(args[j], argTypes[j]); + } + } + if (isConstrainedFPIntrinsic) { + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + return nullptr; + } + if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) { + return Operation::create(builder, loc, + builder.getStringAttr(intrinsicName.value()), + funcResTy, args) + .getResult(); + } else { + return Operation::create(builder, loc, funcResTy, args).getResult(); + } +} + +static mlir::Value emitNeonCall(CIRGenBuilderTy &builder, + llvm::SmallVector<mlir::Type> argTypes, + llvm::SmallVectorImpl<mlir::Value> &args, + llvm::StringRef intrinsicName, + mlir::Type funcResTy, mlir::Location loc, + bool isConstrainedFPIntrinsic = false, + unsigned shift = 0, bool rightshift = false) { + return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>( + builder, std::move(argTypes), args, intrinsicName, funcResTy, loc, + isConstrainedFPIntrinsic, shift, rightshift); +} + std::optional<mlir::Value> CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, const CallExpr *expr) { @@ -1454,6 +1566,16 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, assert(!cir::MissingFeatures::aarch64TblBuiltinExpr()); + const Expr *arg = expr->getArg(expr->getNumArgs() - 1); + NeonTypeFlags type(0); + // A trailing constant integer is used for discriminating overloaded builtin + // calls. Use it to determine the type of this overloaded NEON intrinsic. + if (std::optional<llvm::APSInt> result = + arg->getIntegerConstantExpr(getContext())) + type = NeonTypeFlags(result->getZExtValue()); + + bool usgn = type.isUnsigned(); + mlir::Location loc = getLoc(expr->getExprLoc()); // Handle non-overloaded intrinsics first. @@ -1678,6 +1800,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } + cir::VectorType ty = getNeonType(this, type, loc); + if (!ty) + return nullptr; + + llvm::StringRef intrName; + switch (builtinID) { default: return std::nullopt; @@ -1700,7 +1828,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vmin_v: case NEON::BI__builtin_neon_vminq_v: case NEON::BI__builtin_neon_vminh_f16: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case NEON::BI__builtin_neon_vabd_v: + intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd"; + if (cir::isFPOrVectorOfFPType(ty)) + intrName = "aarch64.neon.fabd"; + return emitNeonCall(builder, {ty, ty}, ops, intrName, ty, loc); case NEON::BI__builtin_neon_vabdq_v: case NEON::BI__builtin_neon_vpadal_v: case NEON::BI__builtin_neon_vpadalq_v: diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 42799d27bba89..909d00630b069 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -1038,88 +1038,6 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { return vdiv_f32(v1, v2); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_s8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]]) -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]] -// CHECK-NEXT: ret <8 x i8> [[ADD_I]] -// -int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { - return vaba_s8(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_s16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]] -// CHECK-NEXT: ret <4 x i16> [[ADD_I]] -// -int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { - return vaba_s16(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_s32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]] -// CHECK-NEXT: ret <2 x i32> [[ADD_I]] -// -int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { - return vaba_s32(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_u8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]]) -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]] -// CHECK-NEXT: ret <8 x i8> [[ADD_I]] -// -uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { - return vaba_u8(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_u16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]] -// CHECK-NEXT: ret <4 x i16> [[ADD_I]] -// -uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { - return vaba_u16(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_u32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]] -// CHECK-NEXT: ret <2 x i32> [[ADD_I]] -// -uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { - return vaba_u32(v1, v2, v3); -} - // CHECK-LABEL: define dso_local <16 x i8> @test_vabaq_s8( // CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -1202,98 +1120,6 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { return vabaq_u32(v1, v2, v3); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_s8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) -// CHECK-NEXT: ret <8 x i8> [[VABD_I]] -// -int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { - return vabd_s8(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_s16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) -// CHECK-NEXT: ret <4 x i16> [[VABD2_I]] -// -int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { - return vabd_s16(v1, v2); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_s32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) -// CHECK-NEXT: ret <2 x i32> [[VABD2_I]] -// -int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { - return vabd_s32(v1, v2); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_u8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) -// CHECK-NEXT: ret <8 x i8> [[VABD_I]] -// -uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { - return vabd_u8(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_u16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) -// CHECK-NEXT: ret <4 x i16> [[VABD2_I]] -// -uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { - return vabd_u16(v1, v2); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_u32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) -// CHECK-NEXT: ret <2 x i32> [[VABD2_I]] -// -uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { - return vabd_u32(v1, v2); -} - -// CHECK-LABEL: define dso_local <2 x float> @test_vabd_f32( -// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8> -// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float> -// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) -// CHECK-NEXT: ret <2 x float> [[VABD2_I]] -// -float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { - return vabd_f32(v1, v2); -} - // CHECK-LABEL: define dso_local <16 x i8> @test_vabdq_s8( // CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index 039a08c23852e..227b23f532fe1 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -42,3 +42,216 @@ int64_t test_vnegd_s64(int64_t a) { // LLVM-NEXT: ret i64 [[VNEGD_I]] return (int64_t)vnegd_s64(a); } + +//===------------------------------------------------------===// +// 2.1.1.6.1. Absolute difference +//===------------------------------------------------------===// +// LLVM-LABEL: @test_vabd_s8( +// CIR-LABEL: @vabd_s8( +int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i> + +// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) +// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) +// LLVM-NEXT: ret <8 x i8> [[VABD_I]] + return vabd_s8(v1, v2); +} + +// LLVM-LABEL: @test_vabd_s16( +// CIR-LABEL: @vabd_s16( +int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { +// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] + +// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) +// LLVM-NEXT: ret <4 x i16> [[VABD2_I]] + return vabd_s16(v1, v2); +} + +// LLVM-LABEL: @test_vabd_s32( +// CIR-LABEL: @vabd_s32( +int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { +// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> +// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] + +// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) +// LLVM-NEXT: ret <2 x i32> [[VABD2_I]] + return vabd_s32(v1, v2); +} + +// LLVM-LABEL: @test_vabd_u8( +// CIR-LABEL: @vabd_u8( +uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i> + +// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) +// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) +// LLVM-NEXT: ret <8 x i8> [[VABD_I]] + return vabd_u8(v1, v2); +} + +// LLVM-LABEL: @test_vabd_u16( +// CIR-LABEL: @vabd_u16( +uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { +// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> +// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> +// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] + +// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) +// LLVM-NEXT: ret <4 x i16> [[VABD2_I]] + return vabd_u16(v1, v2); +} + +// LLVM-LABEL: @test_vabd_u32( +// CIR-LABEL: @vabd_u32( +uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { +// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> +// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> +// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] + +// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) +// LLVM-NEXT: ret <2 x i32> [[VABD2_I]] + return vabd_u32(v1, v2); +} + +// LLVM-LABEL: @test_vabd_f32( +// CIR-LABEL: @vabd_f32( +float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { +// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float> +// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float> +// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] + +// LLVM-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> +// LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) +// LLVM-NEXT: ret <2 x float> [[VABD2_I]] + return vabd_f32(v1, v2); +} + +//===------------------------------------------------------===// +// 2.1.1.6.3. Absolute difference and accumulate +// +// The following builtins expand to a call to vabd_{} builtins, +// which is reflected in the CIR output. +//===------------------------------------------------------===// + +// LLVM-LABEL: @test_vaba_u8( +// CIR-LABEL: @vaba_u8( +uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { +// CIR: [[ABD:%.*]] = cir.call @vabd_u8 +// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]]) + +// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) +// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]]) +// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]] +// LLVM-NEXT: ret <8 x i8> [[ADD_I]] + return vaba_u8(v1, v2, v3); +} + +// LLVM-LABEL: @test_vaba_u16( +// CIR-LABEL: @vaba_u16( +uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { +// CIR: [[ABD:%.*]] = cir.call @vabd_u16 +// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]]) + +// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) +// LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]] +// LLVM-NEXT: ret <4 x i16> [[ADD_I]] + return vaba_u16(v1, v2, v3); +} + +// LLVM-LABEL: @test_vaba_u32( +// CIR-LABEL: @vaba_u32( +uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { +// CIR: [[ABD:%.*]] = cir.call @vabd_u32 +// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]]) + +// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) +// LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]] +// LLVM-NEXT: ret <2 x i32> [[ADD_I]] + return vaba_u32(v1, v2, v3); +} + +// LLVM-LABEL: @test_vaba_s8( +// CIR-LABEL: @vaba_s8( +int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { +// CIR: [[ABD:%.*]] = cir.call @vabd_s8 +// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]]) + +// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) +// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]]) +// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]] +// LLVM-NEXT: ret <8 x i8> [[ADD_I]] + return vaba_s8(v1, v2, v3); +} + +// LLVM-LABEL: @test_vaba_s16( +// CIR-LABEL: @vaba_s16( +int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { +// CIR: [[ABD:%.*]] = cir.call @vabd_s16 +// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]]) + +// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) +// LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]] +// LLVM-NEXT: ret <4 x i16> [[ADD_I]] + return vaba_s16(v1, v2, v3); +} + +// LLVM-LABEL: @test_vaba_s32( +// CIR-LABEL: @vaba_s32( +int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { +// CIR: [[ABD:%.*]] = cir.call @vabd_s32 +// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]]) + +// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8> +// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> +// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) +// LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]] +// LLVM-NEXT: ret <2 x i32> [[ADD_I]] + return vaba_s32(v1, v2, v3); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
