Author: Andrzej WarzyĆski Date: 2026-01-11T14:14:27Z New Revision: 363903eb3ed34f64164632266140ba3d609bdb81
URL: https://github.com/llvm/llvm-project/commit/363903eb3ed34f64164632266140ba3d609bdb81 DIFF: https://github.com/llvm/llvm-project/commit/363903eb3ed34f64164632266140ba3d609bdb81.diff LOG: [CIR][AArch64] Add lowering for unpredicated svdup builtins (#174433) This PR adds CIR lowering support for unpredicated `svdup` SVE builtins. The corresponding ACLE intrinsics are documented at: * https://developer.arm.com/architectures/instruction-sets/intrinsics (search for svdup). Since LLVM provides a direct intrinsic for svdup with a 1:1 mapping, CIR lowers these builtins by emitting a call to the corresponding LLVM intrinsic. DESIGN NOTES ------------ With this change, ACLE intrinsics that have a corresponding LLVM intrinsic can generally be lowered by CIR by reusing LLVM intrinsic metadata, avoiding duplicated intrinsic-name definitions, unless codegen-relevant SVETypeFlags are involved. As a consequence, CIR may no longer emit NYI diagnostics for intrinsics that (a) have a known LLVM intrinsic mapping and (b) do not use such codegen-relevant `SVETypeFlag`s; these intrinsics are lowered directly. IMPLEMENTATION NOTES -------------------- * Intrinsic discovery logic mirrors the approach in CodeGen/TargetBuiltins/ARM.cpp, but is simplified since CIR only requires the intrinsic name. * Test inputs are copied from the existing svdup tests: tests/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c. * The LLVM IR produced _with_ and _without_ `-fclangir` is identical, modulo basic block labels, SROA, and function attributes. EXAMPLE LOWERING ---------------- Input: ```C svint8_t test_svdup_n_s8(int8_t op) { return svdup_n_s8(op); } ``` OUTPUT 1 (default): ```llvm define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 noundef %op) #0 { entry: %op.addr = alloca i8, align 1 store i8 %op, ptr %op.addr, align 1 %0 = load i8, ptr %op.addr, align 1 %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %0) ret <vscale x 16 x i8> %1 } ``` OUTPUT 2 (via `-fclangir`): ```llvm define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 %0) #0 { %2 = alloca i8, i64 1, align 1 %3 = alloca <vscale x 16 x i8>, i64 1, align 16 store i8 %0, ptr %2, align 1 %4 = load i8, ptr %2, align 1 %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %4) store <vscale x 16 x i8> %5, ptr %3, align 16 %6 = load <vscale x 16 x i8>, ptr %3, align 16 ret <vscale x 16 x i8> %6 } ``` Added: clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c Modified: clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp clang/lib/CIR/CodeGen/CIRGenFunction.h Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index e28b3c6cdc2ff..7998fb6b5eaac 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -13,6 +13,7 @@ #include "CIRGenBuilder.h" #include "CIRGenFunction.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/MissingFeatures.h" // TODO(cir): once all builtins are covered, decide whether we still @@ -25,7 +26,6 @@ #include "mlir/IR/Value.h" #include "clang/AST/GlobalDecl.h" #include "clang/Basic/Builtins.h" -#include "clang/Basic/TargetBuiltins.h" using namespace clang; using namespace clang::CIRGen; @@ -52,6 +52,80 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc, builder.getUInt64(scalingFactor, loc)); } +static bool aarch64SVEIntrinsicsProvenSorted = false; + +namespace { +struct AArch64BuiltinInfo { + unsigned builtinID; + unsigned llvmIntrinsic; + uint64_t typeModifier; + + bool operator<(unsigned rhsBuiltinID) const { + return builtinID < rhsBuiltinID; + } + bool operator<(const AArch64BuiltinInfo &te) const { + return builtinID < te.builtinID; + } +}; +} // end anonymous namespace + +#define SVEMAP1(NameBase, llvmIntrinsic, TypeModifier) \ + {SVE::BI__builtin_sve_##NameBase, Intrinsic::llvmIntrinsic, TypeModifier} + +#define SVEMAP2(NameBase, TypeModifier) \ + {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier} +static const AArch64BuiltinInfo aarch64SVEIntrinsicMap[] = { +#define GET_SVE_LLVM_INTRINSIC_MAP +#include "clang/Basic/arm_sve_builtin_cg.inc" +#undef GET_SVE_LLVM_INTRINSIC_MAP +}; + +static const AArch64BuiltinInfo * +findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap, + unsigned builtinID, bool &mapProvenSorted) { + +#ifndef NDEBUG + if (!mapProvenSorted) { + assert(llvm::is_sorted(intrinsicMap)); + mapProvenSorted = true; + } +#endif + + const AArch64BuiltinInfo *info = llvm::lower_bound(intrinsicMap, builtinID); + + if (info != intrinsicMap.end() && info->builtinID == builtinID) + return info; + + return nullptr; +} + +bool CIRGenFunction::getAArch64SVEProcessedOperands( + unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> &ops, + SVETypeFlags typeFlags) { + // Find out if any arguments are required to be integer constant expressions. + unsigned iceArguments = 0; + ASTContext::GetBuiltinTypeError error; + getContext().GetBuiltinType(builtinID, error, &iceArguments); + assert(error == ASTContext::GE_None && "Should not codegen an error"); + + for (unsigned i = 0, e = expr->getNumArgs(); i != e; i++) { + bool isIce = iceArguments & (1 << i); + mlir::Value arg = emitScalarExpr(expr->getArg(i)); + + if (isIce) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + } + + // FIXME: Handle types like svint16x2_t, which are currently incorrectly + // converted to i32. These should be treated as structs and unpacked. + + ops.push_back(arg); + } + return true; +} + std::optional<mlir::Value> CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, const CallExpr *expr) { @@ -65,8 +139,40 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, assert(!cir::MissingFeatures::aarch64SVEIntrinsics()); + auto *builtinIntrInfo = findARMVectorIntrinsicInMap( + aarch64SVEIntrinsicMap, builtinID, aarch64SVEIntrinsicsProvenSorted); + + // The operands of the builtin call + llvm::SmallVector<mlir::Value> ops; + + SVETypeFlags typeFlags(builtinIntrInfo->typeModifier); + if (!CIRGenFunction::getAArch64SVEProcessedOperands(builtinID, expr, ops, + typeFlags)) + return mlir::Value{}; + + if (typeFlags.isLoad() || typeFlags.isStore() || typeFlags.isGatherLoad() || + typeFlags.isScatterStore() || typeFlags.isPrefetch() || + typeFlags.isGatherPrefetch() || typeFlags.isStructLoad() || + typeFlags.isStructStore() || typeFlags.isTupleSet() || + typeFlags.isTupleGet() || typeFlags.isTupleCreate() || + typeFlags.isUndef()) + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + mlir::Location loc = getLoc(expr->getExprLoc()); + if (builtinIntrInfo->llvmIntrinsic != 0) { + std::string llvmIntrName(Intrinsic::getBaseName( + (llvm::Intrinsic::ID)builtinIntrInfo->llvmIntrinsic)); + + llvmIntrName.erase(0, /*std::strlen(".llvm")=*/5); + + return emitIntrinsicCallOp(builder, loc, llvmIntrName, + convertType(expr->getType()), + mlir::ValueRange{ops}); + } + switch (builtinID) { default: return std::nullopt; @@ -103,10 +209,12 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, case SVE::BI__builtin_sve_svpmullb_u64: case SVE::BI__builtin_sve_svpmullb_n_u16: case SVE::BI__builtin_sve_svpmullb_n_u64: + case SVE::BI__builtin_sve_svdup_n_b8: case SVE::BI__builtin_sve_svdup_n_b16: case SVE::BI__builtin_sve_svdup_n_b32: case SVE::BI__builtin_sve_svdup_n_b64: + case SVE::BI__builtin_sve_svdupq_n_b8: case SVE::BI__builtin_sve_svdupq_n_b16: case SVE::BI__builtin_sve_svdupq_n_b32: @@ -129,22 +237,27 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return mlir::Value{}; + case SVE::BI__builtin_sve_svlen_u8: case SVE::BI__builtin_sve_svlen_s8: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 16); + case SVE::BI__builtin_sve_svlen_u16: case SVE::BI__builtin_sve_svlen_s16: case SVE::BI__builtin_sve_svlen_f16: case SVE::BI__builtin_sve_svlen_bf16: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8); + case SVE::BI__builtin_sve_svlen_u32: case SVE::BI__builtin_sve_svlen_s32: case SVE::BI__builtin_sve_svlen_f32: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4); + case SVE::BI__builtin_sve_svlen_u64: case SVE::BI__builtin_sve_svlen_s64: case SVE::BI__builtin_sve_svlen_f64: return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2); + case SVE::BI__builtin_sve_svtbl2_u8: case SVE::BI__builtin_sve_svtbl2_s8: case SVE::BI__builtin_sve_svtbl2_u16: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 3101fc6cd228c..5fe1d9a4f2b76 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -31,6 +31,7 @@ #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" #include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/MissingFeatures.h" #include "clang/CIR/TypeEvaluationKind.h" @@ -1265,6 +1266,9 @@ class CIRGenFunction : public CIRGenTypeCache { /// CIR emit functions /// ---------------------- public: + bool getAArch64SVEProcessedOperands(unsigned builtinID, const CallExpr *expr, + SmallVectorImpl<mlir::Value> &ops, + clang::SVETypeFlags typeFlags); std::optional<mlir::Value> emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ReturnValueSlot returnValue, diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c new file mode 100644 index 0000000000000..3e0a892d6b368 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c @@ -0,0 +1,211 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// ALL-LABEL: @test_svdup_n_s8 +svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s8i {{.*}} -> !cir.vector<[16] x !s8i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s8i) -> !cir.vector<[16] x !s8i> + +// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1 +// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s8,)(op); +} + +// ALL-LABEL: @test_svdup_n_s16 +svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s16i {{.*}} -> !cir.vector<[8] x !s16i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s16i) -> !cir.vector<[8] x !s16i> + +// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: store i16 [[OP]], ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s16,)(op); +} + +// ALL-LABEL: @test_svdup_n_s32 +svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s32i {{.*}} -> !cir.vector<[4] x !s32i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s32i) -> !cir.vector<[4] x !s32i> + +// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} align 4 +// LLVM_OGCG_CIR: store i32 [[OP]], ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s32,)(op); +} + +// ALL-LABEL: @test_svdup_n_s64 +svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !s64i {{.*}} -> !cir.vector<[2] x !s64i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!s64i) -> !cir.vector<[2] x !s64i> + +// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} align 8 +// LLVM_OGCG_CIR: store i64 [[OP]], ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_s64,)(op); +} + +// ALL-LABEL: @test_svdup_n_u8 +svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u8i {{.*}} -> !cir.vector<[16] x !u8i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u8i) -> !cir.vector<[16] x !u8i> + +// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1 +// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u8,)(op); +} + +// ALL-LABEL: @test_svdup_n_u16 +svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u16i {{.*}} -> !cir.vector<[8] x !u16i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u16i) -> !cir.vector<[8] x !u16i> + +// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: store i16 [[OP]], ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u16,)(op); +} + +// ALL-LABEL: @test_svdup_n_u32 +svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u32i {{.*}} -> !cir.vector<[4] x !u32i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u32i) -> !cir.vector<[4] x !u32i> + +// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} align 4 +// LLVM_OGCG_CIR: store i32 [[OP]], ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u32,)(op); +} + +// ALL-LABEL: @test_svdup_n_u64 +svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !u64i {{.*}} -> !cir.vector<[2] x !u64i> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!u64i) -> !cir.vector<[2] x !u64i> + +// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} align 8 +// LLVM_OGCG_CIR: store i64 [[OP]], ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_u64,)(op); +} + +// ALL-LABEL: @test_svdup_n_f16 +svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !cir.f16 {{.*}} -> !cir.vector<[8] x !cir.f16> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!cir.f16) -> !cir.vector<[8] x !cir.f16> + +// LLVM_OGCG_CIR-SAME: half {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} align 2 +// LLVM_OGCG_CIR: store half [[OP]], ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load half, ptr [[OP_ADDR]], align 2 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_f16,)(op); +} + +// ALL-LABEL: @test_svdup_n_f32 +svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !cir.float {{.*}} -> !cir.vector<[4] x !cir.float> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!cir.float) -> !cir.vector<[4] x !cir.float> + +// LLVM_OGCG_CIR-SAME: float {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} align 4 +// LLVM_OGCG_CIR: store float [[OP]], ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load float, ptr [[OP_ADDR]], align 4 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_f32,)(op); +} + +// ALL-LABEL: @test_svdup_n_f64 +svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR +{ +// CIR-SAME: %[[OP:.*]]: !cir.double {{.*}} -> !cir.vector<[2] x !cir.double> +// CIR: %[[ALLOCA:.*]] = cir.alloca +// CIR: cir.store %[[OP]], %[[ALLOCA]] +// CIR: %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]] +// CIR: cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : (!cir.double) -> !cir.vector<[2] x !cir.double> + +// LLVM_OGCG_CIR-SAME: double {{(noundef)?[[:space:]]?}}[[OP:%.*]]) +// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} align 8 +// LLVM_OGCG_CIR: store double [[OP]], ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[OP_LOAD:%.*]] = load double, ptr [[OP_ADDR]], align 8 +// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]]) + return SVE_ACLE_FUNC(svdup,_n,_f64,)(op); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
