[clang] [Clang] Remove preprocessor guards and global feature checks for NEON (PR #95102)
@@ -8084,29 +8084,6 @@ static void HandleNeonVectorTypeAttr(QualType , const ParsedAttr , AuxTI && (AuxTI->getTriple().isAArch64() || AuxTI->getTriple().isARM()); } - // Target must have NEON (or MVE, whose vectors are similar enough - // not to need a separate attribute) - if (!(S.Context.getTargetInfo().hasFeature("neon") || -S.Context.getTargetInfo().hasFeature("mve") || -S.Context.getTargetInfo().hasFeature("sve") || -S.Context.getTargetInfo().hasFeature("sme") || -IsTargetCUDAAndHostARM) && - VecKind == VectorKind::Neon) { -S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) -<< Attr << "'neon', 'mve', 'sve' or 'sme'"; -Attr.setInvalid(); -return; - } - if (!(S.Context.getTargetInfo().hasFeature("neon") || momchil-velikov wrote: You can preserve the behaviour for MVE if you alter the diagnostics condition to be `"NEON type seen" && "no MVE" && "compiling for M-class".` https://github.com/llvm/llvm-project/pull/95102 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -118,6 +124,37 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) { return R; } +bool CodeGenTypes::LLVMTypeLayoutMatchesAST(QualType ASTTy, +llvm::Type *LLVMTy) { + CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy); + CharUnits LLVMSize = + CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy)); + return ASTSize == LLVMSize; +} + +llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T, + llvm::Type *LLVMTy) { + if (!LLVMTy) +LLVMTy = ConvertType(T); + + if (!T->isBitIntType() && LLVMTy->isIntegerTy(1)) +return llvm::IntegerType::get(getLLVMContext(), + (unsigned)Context.getTypeSize(T)); + + if (T->isBitIntType()) { +llvm::Type *R = ConvertType(T); +if (!LLVMTypeLayoutMatchesAST(T, R)) + return llvm::Type::getIntNTy( + getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8); momchil-velikov wrote: cf. https://github.com/llvm/llvm-project/pull/91364#issuecomment-2099384663 I'm fairly certain using load/store type of `iBITS` is the correct thing to do, unconditionally. Not quite sure about the choice between `iBITS` and `[BYTES x i8]`, if we're not talking about a load/stores how the array type could possibly be less efficient, so we don't default unconditionally to it? https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -118,6 +124,37 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) { return R; } +bool CodeGenTypes::LLVMTypeLayoutMatchesAST(QualType ASTTy, +llvm::Type *LLVMTy) { + CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy); + CharUnits LLVMSize = + CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy)); + return ASTSize == LLVMSize; +} + +llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T, + llvm::Type *LLVMTy) { + if (!LLVMTy) +LLVMTy = ConvertType(T); + + if (!T->isBitIntType() && LLVMTy->isIntegerTy(1)) +return llvm::IntegerType::get(getLLVMContext(), + (unsigned)Context.getTypeSize(T)); + + if (T->isBitIntType()) { +llvm::Type *R = ConvertType(T); +if (!LLVMTypeLayoutMatchesAST(T, R)) + return llvm::Type::getIntNTy( + getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8); momchil-velikov wrote: I don't understand why the choice of type is conditional. No matter of the "layout" matches or not, the stores have to produce valid in-memory representation (according to the target ABI), which in the general case means writing **all the** bits of the in-memory representation. https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
https://github.com/momchil-velikov approved this pull request. https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
https://github.com/momchil-velikov approved this pull request. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -674,3 +674,26 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + +multiclass ZAReadz ch> { + let TargetGuard = "sme2p1" in { +def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t, + MergeNone, i_prefix # "_horiz_x" # vg_num, + [IsStreaming, IsInOutZA], ch>; momchil-velikov wrote: > Should these be `InZA` instead of `InOutZA`? And also `ReadZA` (not sure what > that's for). The instructions zero the source tiles after copying (https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/MOVAZ--tile-to-vector--four-registers---Move-and-zero-four-ZA-tile-slices-to-vector-registers-?lang=en ). https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -1989,6 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } + if (const auto *BIT = Ty->getAs()) { +if (BIT->getNumBits() > 128) { + // Long _BitInt has array of bytes as in-memory type. + llvm::Type *NewTy = ConvertType(Ty); momchil-velikov wrote: Oh, I see. It looks close to what we are trying to do with https://github.com/llvm/llvm-project/pull/93495, which is: * create in-memory representations according to the target ABI * improve efficiency of loads/stores, e.g. load/store of `i18` in LLVM must touch just 3 bytes, so a compiler would emit one 16-bit load and one 8-bit load, but if `i18` comes from `_BitInt(18)` then a single 32-bit load would work better. https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -1989,6 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } + if (const auto *BIT = Ty->getAs()) { +if (BIT->getNumBits() > 128) { + // Long _BitInt has array of bytes as in-memory type. + llvm::Type *NewTy = ConvertType(Ty); momchil-velikov wrote: Shouldn't we call calling `ConvertTypeForMem` here? https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][ARM]: Fix Inefficient loads/stores of _BitInt(N) (PR #93495)
@@ -2021,6 +2028,12 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) && "wrong value rep of bool"); } + if (auto *BitIntTy = Ty->getAs()) { +if (CGM.getTarget().isBitIntSignExtended(BitIntTy->isSigned())) momchil-velikov wrote: We might be introducing changes that are not desirable (or correct) for non-Arm targets. Instead, I would suggest to add a description of the in-memory padding for the `_BitInt` types (e.g. via a member function in `TargetInfo`). One reasonable approach is lilke this: ``` enum class TargetBitInitPaddingKind { None, ZeroOrSignExtend, AnyExtend }; ``` where * `None` will be the default and will result in identical code as the one that Clang generates now, i.e. no `sext` or `zext`, load/stores use LLVM type `iN` for `_BitInt(N)`. * `ZeroOrSignExtend` would mean in-memory representation is padded with 0 for `unsigned _BitInt(N)` and with the sign bit for `signed _BitInt(N)`. This will be the value for AArch32 * `AnyExtend` would mean in-memory representation is padded with unspecified bits. This will be the value for AArch64. Since AFAIK we don't have such an operation in LLVM IR, one way to implement this would be identically to `ZeroOrSignExtend` or, alternatively, do zero-extend regardless of the signedness of the `_BitInt(N)` type. https://github.com/llvm/llvm-project/pull/93495 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][ARM]: Fix Inefficient loads/stores of _BitInt(N) (PR #93495)
@@ -221,6 +221,16 @@ bool AArch64TargetInfo::validateTarget(DiagnosticsEngine ) const { return true; } +unsigned AArch64TargetInfo::getBitIntLegalWidth(unsigned Width) const { momchil-velikov wrote: This function is likely unnecessary (also it's incorrect in a couple of ways[1]). I think you should be using instead `ASTConext::getTypeInfo(T).Width`. [1] Representation might be in less than 32-bits (could be also 8 or 16) and `_BitInt(N), N > 128` is not `N` bits wide, it's in multiples of `i128`. https://github.com/llvm/llvm-project/pull/93495 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Fix feature flags dependecies (PR #90612)
momchil-velikov wrote: > > This patch removes FEAT_FPMR from list of available of architecture > > features, instead enabling FMPR register by default. > > Can you expand a little bit on the reasoning? It doesn't seem all that > problematic but is still eyebrow-raising. The overall idea is that system registers ought be available everywhere without the need to explicitly enable them with a command line option. Since `FEAT_FPMR` has no function other than enabling the register and it is going to be enabled by default, having a command line option, predicate, feature definition, etc becomes pointless. The FP8 instructions themselves are still guarded by a target feature. https://github.com/llvm/llvm-project/pull/90612 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
momchil-velikov wrote: Rebased. https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From b1b69ffcaf4525a66dde1ae7f1a022c85204a579 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 16:25:43 +0100 Subject: [PATCH 1/2] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. Change-Id: I46d0333d8ed8508cd9cd23e02dd1c2d48fb74cd2 --- clang/lib/CodeGen/CGExpr.cpp | 2 +- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index cd1c48b420382..6f9237e2067f5 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,7 +4180,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && + if (E->getBase()->getType()->isSubscriptableVectorType() && !isa(E->getBase())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 326879b0883fa..49541edf106e1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5185,7 +5185,7 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->isSubscriptableVectorType()) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} >From 08a155b49a6b9c859ba8569170e0f71e63b76735 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 16:26:06 +0100 Subject: [PATCH 2/2] [fixiup] Add a test using compound assignment operator Change-Id: I81e1fd4f23eb65a96e71015de7a4562fcbc53c0f --- .../test/CodeGen/aarch64-sve-vector-subscript-ops.c | 12 1 file changed, 12 insertions(+) diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index 634423765c4c3..52a05d010de9b 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -98,3 +98,15 @@ svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { a[b] = 1.0f; return a; } + +// CHECK-LABEL: @subscript_read_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECEXT:%.*]] = extractelement [[A:%.*]], i64 [[B:%.*]] +// CHECK-NEXT:[[ADD:%.*]] = fadd float [[VECEXT]], 1.00e+00 +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A]], float [[ADD]], i64 [[B]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_read_write_float32(svfloat32_t a, size_t b) { + a[b] += 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) (PR #92778)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/92778 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) (PR #92778)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/92778 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
@@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { momchil-velikov wrote: Split out to https://github.com/llvm/llvm-project/pull/92778 https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) (PR #92778)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/92778 None >From 435f3104e68ef278196417c293093131258c549d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 15:43:31 +0100 Subject: [PATCH] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) Change-Id: I514431a482ffa0a2d906c019b6e374bf4607571e --- clang/include/clang/AST/Type.h | 5 clang/lib/Sema/SemaExpr.cpp| 44 +++--- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index da3834f19ca04..9a5c6e8d562c3 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2523,6 +2523,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isVectorType() const;// GCC vector type. bool isExtVectorType() const; // Extended vector type. bool isExtVectorBoolType() const; // Extended vector type with bool element. + bool isSubscriptableVectorType() const; bool isMatrixType() const;// Matrix type. bool isConstantMatrixType() const;// Constant matrix type. bool isDependentAddressSpaceType() const; // value-dependent address space qualifier @@ -7729,6 +7730,10 @@ inline bool Type::isExtVectorBoolType() const { return cast(CanonicalType)->getElementType()->isBooleanType(); } +inline bool Type::isSubscriptableVectorType() const { + return isVectorType() || isSveVLSBuiltinType(); +} + inline bool Type::isMatrixType() const { return isa(CanonicalType); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 5ecfdee21f09d..c86f1d9c8076e 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5283,36 +5283,22 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, << ResultType << BaseExpr->getSourceRange(); return ExprError(); } - } else if (const VectorType *VTy = LHSTy->getAs()) { -BaseExpr = LHSExp;// vectors: V[123] -IndexExpr = RHSExp; -// We apply C++ DR1213 to vector subscripting too. -if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { - ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); - if (Materialized.isInvalid()) -return ExprError(); - LHSExp = Materialized.get(); + } else if (LHSTy->isSubscriptableVectorType()) { +if (LHSTy->isBuiltinType() && +LHSTy->getAs()->isSveVLSBuiltinType()) { + const BuiltinType *BTy = LHSTy->getAs(); + if (BTy->isSVEBool()) +return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) + << LHSExp->getSourceRange() + << RHSExp->getSourceRange()); + ResultType = BTy->getSveEltType(Context); +} else { + const VectorType *VTy = LHSTy->getAs(); + ResultType = VTy->getElementType(); } -VK = LHSExp->getValueKind(); -if (VK != VK_PRValue) - OK = OK_VectorComponent; - -ResultType = VTy->getElementType(); -QualType BaseType = BaseExpr->getType(); -Qualifiers BaseQuals = BaseType.getQualifiers(); -Qualifiers MemberQuals = ResultType.getQualifiers(); -Qualifiers Combined = BaseQuals + MemberQuals; -if (Combined != MemberQuals) - ResultType = Context.getQualifiedType(ResultType, Combined); - } else if (LHSTy->isBuiltinType() && - LHSTy->getAs()->isSveVLSBuiltinType()) { -const BuiltinType *BTy = LHSTy->getAs(); -if (BTy->isSVEBool()) - return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) - << LHSExp->getSourceRange() << RHSExp->getSourceRange()); - -BaseExpr = LHSExp; +BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; +// We apply C++ DR1213 to vector subscripting too. if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); if (Materialized.isInvalid()) @@ -5323,8 +5309,6 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, if (VK != VK_PRValue) OK = OK_VectorComponent; -ResultType = BTy->getSveEltType(Context); - QualType BaseType = BaseExpr->getType(); Qualifiers BaseQuals = BaseType.getQualifiers(); Qualifiers MemberQuals = ResultType.getQualifiers(); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From 435f3104e68ef278196417c293093131258c549d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 15:43:31 +0100 Subject: [PATCH 1/3] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) Change-Id: I514431a482ffa0a2d906c019b6e374bf4607571e --- clang/include/clang/AST/Type.h | 5 clang/lib/Sema/SemaExpr.cpp| 44 +++--- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index da3834f19ca04..9a5c6e8d562c3 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2523,6 +2523,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isVectorType() const;// GCC vector type. bool isExtVectorType() const; // Extended vector type. bool isExtVectorBoolType() const; // Extended vector type with bool element. + bool isSubscriptableVectorType() const; bool isMatrixType() const;// Matrix type. bool isConstantMatrixType() const;// Constant matrix type. bool isDependentAddressSpaceType() const; // value-dependent address space qualifier @@ -7729,6 +7730,10 @@ inline bool Type::isExtVectorBoolType() const { return cast(CanonicalType)->getElementType()->isBooleanType(); } +inline bool Type::isSubscriptableVectorType() const { + return isVectorType() || isSveVLSBuiltinType(); +} + inline bool Type::isMatrixType() const { return isa(CanonicalType); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 5ecfdee21f09d..c86f1d9c8076e 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5283,36 +5283,22 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, << ResultType << BaseExpr->getSourceRange(); return ExprError(); } - } else if (const VectorType *VTy = LHSTy->getAs()) { -BaseExpr = LHSExp;// vectors: V[123] -IndexExpr = RHSExp; -// We apply C++ DR1213 to vector subscripting too. -if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { - ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); - if (Materialized.isInvalid()) -return ExprError(); - LHSExp = Materialized.get(); + } else if (LHSTy->isSubscriptableVectorType()) { +if (LHSTy->isBuiltinType() && +LHSTy->getAs()->isSveVLSBuiltinType()) { + const BuiltinType *BTy = LHSTy->getAs(); + if (BTy->isSVEBool()) +return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) + << LHSExp->getSourceRange() + << RHSExp->getSourceRange()); + ResultType = BTy->getSveEltType(Context); +} else { + const VectorType *VTy = LHSTy->getAs(); + ResultType = VTy->getElementType(); } -VK = LHSExp->getValueKind(); -if (VK != VK_PRValue) - OK = OK_VectorComponent; - -ResultType = VTy->getElementType(); -QualType BaseType = BaseExpr->getType(); -Qualifiers BaseQuals = BaseType.getQualifiers(); -Qualifiers MemberQuals = ResultType.getQualifiers(); -Qualifiers Combined = BaseQuals + MemberQuals; -if (Combined != MemberQuals) - ResultType = Context.getQualifiedType(ResultType, Combined); - } else if (LHSTy->isBuiltinType() && - LHSTy->getAs()->isSveVLSBuiltinType()) { -const BuiltinType *BTy = LHSTy->getAs(); -if (BTy->isSVEBool()) - return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) - << LHSExp->getSourceRange() << RHSExp->getSourceRange()); - -BaseExpr = LHSExp; +BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; +// We apply C++ DR1213 to vector subscripting too. if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); if (Materialized.isInvalid()) @@ -5323,8 +5309,6 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, if (VK != VK_PRValue) OK = OK_VectorComponent; -ResultType = BTy->getSveEltType(Context); - QualType BaseType = BaseExpr->getType(); Qualifiers BaseQuals = BaseType.getQualifiers(); Qualifiers MemberQuals = ResultType.getQualifiers(); >From 7fc3ff1758fa424bdbea3c847aede260f7598814 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 16:25:43 +0100 Subject: [PATCH 2/3] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`.
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2939,59 +2922,18 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask; switch (SMEMatrixType) { case (AArch64::SMEMatrixArray): - return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false, - /*HasZPROut*/ false); + return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB); case (AArch64::SMEMatrixTileB): - switch (MI.getOpcode()) { - case AArch64::MOVAZ_2ZMI_H_B_PSEUDO: - case AArch64::MOVAZ_2ZMI_V_B_PSEUDO: - case AArch64::MOVAZ_4ZMI_H_B_PSEUDO: - case AArch64::MOVAZ_4ZMI_V_B_PSEUDO: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ true); - default: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ false); - } + return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB); case (AArch64::SMEMatrixTileH): - switch (MI.getOpcode()) { - case AArch64::MOVAZ_2ZMI_H_H_PSEUDO: - case AArch64::MOVAZ_2ZMI_V_H_PSEUDO: - case AArch64::MOVAZ_4ZMI_H_H_PSEUDO: - case AArch64::MOVAZ_4ZMI_V_H_PSEUDO: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ true); - default: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ false); - } + return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB); + ///*HasTile*/ true, /*HasZPROut*/ false); momchil-velikov wrote: Stray comment. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
momchil-velikov wrote: ``` if (HasTile) { MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); MIB.addReg(BaseReg + MI.getOperand(0).getImm()); StartIdx = 1; } else MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); } ``` Needs extra braces around the `else` clause, https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements cf. ``` // Use braces for the `if` block to keep it uniform with the `else` block. if (isa(D)) { handleFunctionDecl(D); } else { // In this `else` case, it is necessary that we explain the situation with // this surprisingly long comment, so it would be unclear without the braces // whether the following statement is in the scope of the `if`. handleOtherDecl(D); } ``` https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2883,19 +2883,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr , MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, - MachineInstr , - MachineBasicBlock *BB, bool HasTile) const { + MachineInstr , MachineBasicBlock *BB, + bool HasTile, bool HasZPROut) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); unsigned StartIdx = 0; - if (HasTile) { -MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); -MIB.addReg(BaseReg + MI.getOperand(0).getImm()); -StartIdx = 1; - } else -MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); - + if (HasZPROut) { momchil-velikov wrote: Looks good with the last change. Still can further simplify and make it more readable like in the snippet above. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
@@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -verify -emit-llvm %s momchil-velikov wrote: Thanks! https://github.com/llvm/llvm-project/pull/91606 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/91606 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
@@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From fd4a31c1eb48db410f5445f45243dfbc1d9d22ab Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 13 May 2024 14:27:51 +0100 Subject: [PATCH 1/2] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. --- clang/lib/CodeGen/CGExpr.cpp | 6 -- clang/lib/Sema/SemaExpr.cpp | 4 +++- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d96c7bb1e5682..37b8b723937b7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e0aae6333e1a1..f3983a3cbefb1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5227,7 +5227,9 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->getAs() && + !(LHSExp->getType()->isBuiltinType() && +LHSExp->getType()->getAs()->isSveVLSBuiltinType())) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} >From fec051ff91df9cc8fca4d0571fe77a18cfb58072 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 17 May 2024 13:20:18 +0100 Subject: [PATCH 2/2] [fixiup] Add a test using compound assignment operator Change-Id: I81e1fd4f23eb65a96e71015de7a4562fcbc53c0f --- .../test/CodeGen/aarch64-sve-vector-subscript-ops.c | 12 1 file changed, 12 insertions(+) diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index 634423765c4c3..52a05d010de9b 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -98,3 +98,15 @@ svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { a[b] = 1.0f; return a; } + +// CHECK-LABEL: @subscript_read_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECEXT:%.*]] = extractelement [[A:%.*]], i64 [[B:%.*]] +// CHECK-NEXT:[[ADD:%.*]] = fadd float [[VECEXT]], 1.00e+00 +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A]], float [[ADD]], i64 [[B]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_read_write_float32(svfloat32_t a, size_t b) { + a[b] += 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
@@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { momchil-velikov wrote: AFAICT here https://github.com/llvm/llvm-project/blob/371eccd5dfed88c8e76449233d8388c12be3464b/clang/lib/Sema/SemaExpr.cpp#L5307 we are enabling array subscripts for SVE only. Perhaps we can be generalised to any size-less vector type, in a followup patch, @jacquesguan , what do you think? https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From 2e081d74e87ad14fdf6d950d3e3da6bed07ee723 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 13 May 2024 14:27:51 +0100 Subject: [PATCH] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. --- clang/lib/CodeGen/CGExpr.cpp | 6 -- clang/lib/Sema/SemaExpr.cpp | 4 +++- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d96c7bb1e5682..37b8b723937b7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bb4b116fd73ca..fd16be30bd848 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5383,7 +5383,9 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->getAs() && + !(LHSExp->getType()->isBuiltinType() && +LHSExp->getType()->getAs()->isSveVLSBuiltinType())) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/91965 The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertelement`. >From 2e081d74e87ad14fdf6d950d3e3da6bed07ee723 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 13 May 2024 14:27:51 +0100 Subject: [PATCH] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. --- clang/lib/CodeGen/CGExpr.cpp | 6 -- clang/lib/Sema/SemaExpr.cpp | 4 +++- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d96c7bb1e5682..37b8b723937b7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bb4b116fd73ca..fd16be30bd848 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5383,7 +5383,9 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->getAs() && + !(LHSExp->getType()->isBuiltinType() && +LHSExp->getType()->getAs()->isSveVLSBuiltinType())) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91606 >From 43fb20b7492307740c437e85c3f73af068d093cf Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:56:31 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (#88266) According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7808ee559932e..80e635e4a57ec 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 0..9a8aa448d3780 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +//
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91606 >From d3e381ac645d08b6f3b01283d47344556a163605 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:56:31 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (#88266) According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7808ee559932e..80e635e4a57ec 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 0..9a8aa448d3780 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +//
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for 16-bit non-widening FMLA/FMLS (PR #88553)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/88553 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for bfloat16 min/max/minnm/maxnm (PR #90105)
https://github.com/momchil-velikov commented: LGTM, cheers! https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 8a63b17711d36cfeb4aab591853163119f5f167d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/4] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea2..a18a5094a15e 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index ..40fcad6a5764 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/91606 [Recommit of e88ba6d975d887ca001cae30bfa0c53d91165148] According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. >From fc45f19cddc7b2dee55f53a2c464d5819f06a0ad Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:56:31 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (#88266) According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea28..000bd97a4b25d 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 0..9a8aa448d3780 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
momchil-velikov wrote: > Thanks for the quick revert! > > Is the failure due to a conflict with another commit that landed? Perhaps, e.g. https://github.com/llvm/llvm-project/pull/91140 https://github.com/llvm/llvm-project/pull/88266 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" (PR #91597)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/91597 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" (PR #91597)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/91597 Reverts llvm/llvm-project#88266 due to test failures error: 'expected-error' diagnostics seen but not expected: (frontend): '-fsyntax-only' action ignored; '-emit-llvm' action specified previously >From 0f71196108d1c3c1bb44305a3a8392f406ae71e9 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:01:53 +0100 Subject: [PATCH] =?UTF-8?q?Revert=20"[AArch64]=20Add=20intrinsics=20for=20?= =?UTF-8?q?multi-vector=20to=20ZA=20array=20vector=20accumula=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e88ba6d975d887ca001cae30bfa0c53d91165148. --- clang/include/clang/Basic/arm_sme.td | 10 - .../acle_sme2_add_sub_za16.c | 193 -- .../acle_sme2_add_sub_za16.c | 29 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 -- 6 files changed, 9 insertions(+), 389 deletions(-) delete mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c delete mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c delete mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 000bd97a4b25d..1ac6d5170ea28 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,16 +298,6 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } - - let TargetGuard = "sme-f16f16|sme-f8f16" in { -def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; -def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; - } - - let TargetGuard = "sme2,b16b16" in { -def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; -def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; - } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c deleted file mode 100644 index d98427fac610b..0 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c +++ /dev/null @@ -1,193 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 -#endif - -// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/88266 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2883,19 +2883,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr , MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, - MachineInstr , - MachineBasicBlock *BB, bool HasTile) const { + MachineInstr , MachineBasicBlock *BB, + bool HasTile, bool HasZPROut) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); unsigned StartIdx = 0; - if (HasTile) { -MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); -MIB.addReg(BaseReg + MI.getOperand(0).getImm()); -StartIdx = 1; - } else -MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); - + if (HasZPROut) { momchil-velikov wrote: I think it can be made a bit more clear and less verbose if we separate the conditions and use `StartIdx` to track how many of the input operands we have consumes, something like: ``` unsigned StartIdx = 0; if (HasGPROut) { MIB.add(MI.getOperand(0)); // Output ZPR ++StartIdx; } if (HasTile) { MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm(), RegState::Define); // Output ZA Tile MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm()); // Input Za Tile ++StartIdx; } else { MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); } ``` https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2883,19 +2883,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr , MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, - MachineInstr , - MachineBasicBlock *BB, bool HasTile) const { + MachineInstr , MachineBasicBlock *BB, + bool HasTile, bool HasZPROut) const { momchil-velikov wrote: I'm wondering would it be possible to remove *both* `bool` parameters and instead infer their value in the function itself. Maybe like this: ``` bool HasTile = BaseReg != AArch64::ZA; bool HasZPROut = HasTile && MI.getOperand(0).isReg(); ``` https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2930,17 +2939,59 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask; switch (SMEMatrixType) { case (AArch64::SMEMatrixArray): - return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false); + return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false, momchil-velikov wrote: We the changes from the comment above we can remove all the `bool` arguments and, most importantly, all those opcodes. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for bflaot16 min/max/minnm/maxnm (PR #90105)
momchil-velikov wrote: Typo in commit message: `bflaot16` > Variations other than bfloat16 had been already supported. -> Variations other than bfloat16 are already supported. https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 74ee4857a76bc7eb5353dc22311e766ec5356514 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88266 >From cafe0a8b70ad0189b638ec377e7d8cba9e786ecb Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 10 Apr 2024 11:25:50 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..000bd97a4b25d5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 00..d98427fac610b8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT:ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t( +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]])
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88266 >From 09167c5df2b50476a5073ff2e527503d090e7995 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 10 Apr 2024 11:25:50 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..000bd97a4b25d5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 00..d98427fac610b8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT:ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t( +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]])
[clang] [llvm] [AArch64] Add intrinsics for bflaot16 min/max/minnm/maxnm (PR #90105)
@@ -3373,7 +3373,7 @@ let TargetPrefix = "aarch64" in { // Multi-vector min/max // - foreach ty = ["f", "s", "u"] in { + foreach ty = ["bf", "f", "s", "u"] in { momchil-velikov wrote: You could just omit that part. Then the `bfloat` intrinsics would use `fmin`/`fmax`/etc in the names without ambiguity, since they are polymorphic. https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for bflaot16 min/max/minnm/maxnm (PR #90105)
@@ -3387,7 +3387,7 @@ let TargetPrefix = "aarch64" in { // Multi-vector floating point min/max number // - foreach instr = ["fmaxnm", "fminnm"] in { + foreach instr = ["fmaxnm", "bfmaxnm", "fminnm", "bfminnm"] in { momchil-velikov wrote: Likewise here. https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
@@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo +: SMEPseudo2Instr, momchil-velikov wrote: This is not needed. https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
@@ -2832,6 +2832,23 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg, return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg, momchil-velikov wrote: This function looks almost identical to `EmitZAInstr`. It looks to me you can reuse `EmitZAInstr` (with a couple of small modifications) and then employ the `SMEPseudo2Instr` technique. Then you won't need the switch cases starting at line 3012. https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for 16-bit non-widening FMLA/FMLS (PR #88553)
@@ -458,6 +458,40 @@ let TargetGuard = "sme2,sme-f64f64" in { def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; } +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; + def SVMLS_MULTI_VG1x2_F16 : Inst<"svmls_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLS_MULTI_VG1x4_F16 : Inst<"svmls_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsInOutZA], []>; + + def SVMLA_SINGLE_VG1x2_F16 : Inst<"svmla[_single]_za16[_f16]_vg1x2", "vm2d", "h", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLA_SINGLE_VG1x4_F16 : Inst<"svmla[_single]_za16[_f16]_vg1x4", "vm4d", "h", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsInOutZA], []>; + def SVMLS_SINGLE_VG1x2_F16 : Inst<"svmls[_single]_za16[_f16]_vg1x2", "vm2d", "h", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLS_SINGLE_VG1x4_F16 : Inst<"svmls[_single]_za16[_f16]_vg1x4", "vm4d", "h", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsInOutZA], []>; + + def SVMLA_LANE_VG1x2_F16 : Inst<"svmla_lane_za16[_f16]_vg1x2", "vm2di", "h", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; + def SVMLA_LANE_VG1x4_F16 : Inst<"svmla_lane_za16[_f16]_vg1x4", "vm4di", "h", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; + def SVMLS_LANE_VG1x2_F16 : Inst<"svmls_lane_za16[_f16]_vg1x2", "vm2di", "h", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; + def SVMLS_LANE_VG1x4_F16 : Inst<"svmls_lane_za16[_f16]_vg1x4", "vm4di", "h", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; +} + +let TargetGuard = "sme2,b16b16" in { momchil-velikov wrote: Arm ARM, version K.a (March 2024) (https://developer.arm.com/documentation/ddi0487/ka ), page A2-173 > If FEAT_SVE_B16B16 is implemented, then FEAT_SME2 or FEAT_SVE2 is implemented. https://github.com/llvm/llvm-project/pull/88553 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -1985,6 +1986,34 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } +template +void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, momchil-velikov wrote: The real question is why is this is a function template or, if you want, why `AArch64DAGToDAGISel::SelectMultiVectorMove` is a function template? Both the template parameters do not affect any type, so we aren't benefiting from any kind of parametric polymorphism, the parameters themselves are only passed as ordinary parameters to `SelectSMETileSlice` and as such can't participate in any constant folding that would warrant multiple instantiations (each of which is essentially a specialisation of the function). IMHO, it's OK to have the `Scale` and `NumVecs` separate since `SelectMultiVectorMove` is a hint we may need it some day. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -286,14 +286,26 @@ multiclass sme_outer_product_fp64 def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } -multiclass sme2p1_fmop_tile_fp16 op, ZPRRegOp zpr_ty>{ - def NAME : sme_fp_outer_product_inst { +multiclass sme2p1_fmop_tile_f8f16 op> { + def NAME : sme_fp_outer_product_inst { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; } } +multiclass sme2p1_fmop_tile_fp16 op, ValueType vt, SDPatternOperator intrinsic = null_frag> { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for 16-bit non-widening FMLA/FMLS (PR #88553)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88553 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { momchil-velikov wrote: Done. The Clang instrinsics use the same target features as the underlying assembly instructions. If the features on the assembly instruction are not entirely correct we should fix it, but in a separate patch. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, int_aarch64_sme_mopa_nonwide>; momchil-velikov wrote: Discussed offline. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", momchil-velikov wrote: In fact we can reuse the existing `aarch64_sme_mopa` which is used for other non-widening operations and since they are polymorphic and non-widening the instantiation type is enough to disambiguate the operation. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", momchil-velikov wrote: Done https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/88266 According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. >From 2b0557d4a62476b827352b6775588cef15cecd33 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 10 Apr 2024 11:25:50 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators --- clang/include/clang/Basic/arm_sme.td | 11 + .../acle_sme2_add_sub_za16.c | 191 ++ .../acle_sme2_add_sub_za16.c | 26 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 146 + 6 files changed, 384 insertions(+), 10 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..dcfaefa7a3e266 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,17 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme2p1,sme-f16f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2p1,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 00..bdf07f86b9c93d --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,191 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call
[clang] [llvm] [AArch64][SME] Add intrinsics for multi-vector BFCLAMP (PR #88251)
@@ -2148,6 +2148,11 @@ let TargetGuard = "sme2" in { def SVSCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "csil", MergeNone, "aarch64_sve_sclamp_single_x4", [IsStreaming], []>; def SVUCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp_single_x4", [IsStreaming], []>; def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; + + let TargetGuard = "b16b16"in { momchil-velikov wrote: I does not work. A test for target features (in `Sema`) would catch that. https://github.com/llvm/llvm-project/pull/88251 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
momchil-velikov wrote: > I noticed that file names and file location are using sme2 as prefix. > Shouldn't we use sme2p1 prefix for this intrinsic ? None of instructions seem to require `FEAT_SME2p1`: https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPS--non-widening---BFloat16-floating-point-outer-product-and-subtract-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPA--non-widening---Floating-point-outer-product-and-accumulate-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/88105 According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. >From ee78ad565158c2d1301265415992511ea559e7a6 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..e60a400b094850 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mops_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..36a75609534653 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] [SVE] Created intrinsics for DUPQ instr. (PR #83260)
@@ -10007,6 +10007,16 @@ multiclass sve2p1_dupq { bits<1> index; let Inst{20} = index; } + + def : SVE_2_Op_Imm_Pat(NAME # _B)>; momchil-velikov wrote: Change them to `_timm`. https://github.com/llvm/llvm-project/pull/83260 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [Clang][LLVM][AArch64]SVE2.1 update the intrinsics according to acle[1] (PR #76844)
https://github.com/momchil-velikov approved this pull request. https://github.com/llvm/llvm-project/pull/76844 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [TargetParser] Define AEK_FCMA and AEK_JSCVT for tsv110 (PR #75516)
@@ -81,6 +81,15 @@ static bool DecodeAArch64Features(const Driver , StringRef text, else return false; +// +jsconv and +complxnum implies +neon and +fp-armv8 momchil-velikov wrote: According to the latest Arm ARM (https://developer.arm.com/documentation/ddi0487/ja/?lang=en) the architecrtural extensions `FEAT_FCMA` and `FEAT_JSCVT` are mandatory in Armv8.3-a and are not optional in any architecture version. For such features, our convention is to not expose them as command-line options. https://github.com/llvm/llvm-project/pull/75516 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
@@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
@@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { momchil-velikov wrote: Yeah, I'll add the `IsStreamingCompatible` flag. What needs to be updated in tests? https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
momchil-velikov wrote: Rebased the clear the test run. https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75596 >From 04a03eae3fcbdd57257ce3867615ec6be9d84e53 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 15 Dec 2023 12:18:53 + Subject: [PATCH 1/2] [AArch64] Update target feature requirements of SVE bfloat instructions According to the latest update of the ISA https://developer.arm.com/documentation/ddi0602/2023-09/?lang=en all of the affected instruction encodings now require (FEAT_SVE2 or FEAT_SME2) and FEAT_SVE_B16B16 --- clang/include/clang/Basic/arm_sve.td | 2 +- .../acle_sve2p1_bfadd.c | 11 ++-- .../acle_sve2p1_bfmax.c | 11 ++-- .../acle_sve2p1_bfmaxnm.c | 11 ++-- .../acle_sve2p1_bfmin.c | 11 ++-- .../acle_sve2p1_bfminnm.c | 11 ++-- .../acle_sve2p1_bfmla.c | 11 ++-- .../acle_sve2p1_bfmls.c | 11 ++-- .../acle_sve2p1_bfmul.c | 11 ++-- .../acle_sve2p1_bfsub.c | 11 ++-- llvm/lib/Target/AArch64/AArch64.td| 4 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++-- llvm/test/MC/AArch64/SVE2p1/bfadd.s | 43 ++-- llvm/test/MC/AArch64/SVE2p1/bfclamp.s | 32 llvm/test/MC/AArch64/SVE2p1/bfmax.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmin.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfminnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmla.s | 44 +--- llvm/test/MC/AArch64/SVE2p1/bfmls.s | 45 +--- llvm/test/MC/AArch64/SVE2p1/bfmul.s | 51 +++ llvm/test/MC/AArch64/SVE2p1/bfsub.s | 43 ++-- 22 files changed, 311 insertions(+), 198 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98d7028eb28309..e84d6e5e4cc602 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">; defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">; defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c index 327c4f078872b3..a3026fee3f6d29 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c @@ -1,10 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o -
[llvm] [clang] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
@@ -2066,7 +2066,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { momchil-velikov wrote: Done. Removed comment as it it useless, the corresponding TargetGuard is just few lines above and the nesting structure is not at all complex. https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75596 >From fc5c82e61efef3f1cd2f6606b12c358637a687f5 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 15 Dec 2023 12:18:53 + Subject: [PATCH 1/2] [AArch64] Update target feature requirements of SVE bfloat instructions According to the latest update of the ISA https://developer.arm.com/documentation/ddi0602/2023-09/?lang=en all of the affected instruction encodings now require (FEAT_SVE2 or FEAT_SME2) and FEAT_SVE_B16B16 --- clang/include/clang/Basic/arm_sve.td | 2 +- .../acle_sve2p1_bfadd.c | 11 ++-- .../acle_sve2p1_bfmax.c | 11 ++-- .../acle_sve2p1_bfmaxnm.c | 11 ++-- .../acle_sve2p1_bfmin.c | 11 ++-- .../acle_sve2p1_bfminnm.c | 11 ++-- .../acle_sve2p1_bfmla.c | 11 ++-- .../acle_sve2p1_bfmls.c | 11 ++-- .../acle_sve2p1_bfmul.c | 11 ++-- .../acle_sve2p1_bfsub.c | 11 ++-- llvm/lib/Target/AArch64/AArch64.td| 4 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++-- llvm/test/MC/AArch64/SVE2p1/bfadd.s | 43 ++-- llvm/test/MC/AArch64/SVE2p1/bfclamp.s | 32 llvm/test/MC/AArch64/SVE2p1/bfmax.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmin.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfminnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmla.s | 44 +--- llvm/test/MC/AArch64/SVE2p1/bfmls.s | 45 +--- llvm/test/MC/AArch64/SVE2p1/bfmul.s | 51 +++ llvm/test/MC/AArch64/SVE2p1/bfsub.s | 43 ++-- 22 files changed, 311 insertions(+), 198 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98d7028eb28309..e84d6e5e4cc602 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">; defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">; defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c index 327c4f078872b3..a3026fee3f6d29 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c @@ -1,10 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o -
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [Clang][SVE2.1] Add floating-point variants of `svrevd_XX` (PR #75117)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/75117 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75200 >From d97312680eff280210f588ef22416f845d31d2ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 12 Dec 2023 15:08:33 + Subject: [PATCH 1/4] [Clang][SVE2.1] Make the part of the name optional for `svewhileXX` builtins with predicate-as-counter The `_s64`/`_u64` part can be omitted now. It's inferred from the argument types. --- clang/include/clang/Basic/arm_sve.td | 18 ++- .../acle_sve2p1_while_pn.c| 136 +- 2 files changed, 80 insertions(+), 74 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a429a3c5fe378a..9f4cf98ea28a07 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1979,17 +1979,15 @@ let TargetGuard = "sve2p1|sme2" in { //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; -} -let TargetGuard = "sve2p1" in { -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelo_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilels_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelo_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilels_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVLD1B_X2 : MInst<"svld1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; def SVLD1H_X2 : MInst<"svld1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c index 3dbb38582b676c..08c1ee949c1116 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c @@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature
[llvm] [clang] [Clang][SVE2.1] Add floating-point variants of `svrevd_XX` (PR #75117)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75117 >From 979b240d2a084eb87db43d3fabfffa8d3351d294 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 11 Dec 2023 23:25:07 + Subject: [PATCH 1/2] [Clang][SVE2.1] Add floating-point variants of `svrevd_XX` --- clang/include/clang/Basic/arm_sve.td | 4 +- .../aarch64-sve2-intrinsics/acle_sve2_revd.c | 193 ++ llvm/lib/Target/AArch64/SMEInstrFormats.td| 6 + .../CodeGen/AArch64/sve2-intrinsics-revd.ll | 41 4 files changed, 242 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a429a3c5fe378a..cbc2af73d6052e 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2077,11 +2077,11 @@ def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aar def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } -let TargetGuard = "sve2p1" in { +let TargetGuard = "sve2p1|sme" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; -defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; +defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">; } let TargetGuard = "sve2p1|sme2" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c index 9d912c5d9e2767..fa005c16763c0f 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c @@ -388,3 +388,196 @@ svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) { svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) { return SVE_ACLE_FUNC(svrevd, _u64, _x, )(pg, op); } + + +// CHECK-LABEL: @test_svrevd_bf16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svrevd_bf16_zu10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT:ret [[TMP1]] +// +svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _bf16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f16_zu10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT:ret [[TMP1]] +// +svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _f16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f32_zu10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT:ret [[TMP1]] +// +svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) { + return SVE_ACLE_FUNC(svrevd, _f32, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f64_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f64_zu10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call
[clang] [Clang][SVE2.1] Add intrinsics for `WHILEcc` resulting in predicate pair (PR #75107)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/75107 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75200 >From bb881371fb036819a1d6489a9779e2c5ac7e7d3c Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 12 Dec 2023 15:08:33 + Subject: [PATCH 1/4] [Clang][SVE2.1] Make the part of the name optional for `svewhileXX` builtins with predicate-as-counter The `_s64`/`_u64` part can be omitted now. It's inferred from the argument types. --- clang/include/clang/Basic/arm_sve.td | 18 ++- .../acle_sve2p1_while_pn.c| 136 +- 2 files changed, 80 insertions(+), 74 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index aa9b105364a51a..004a381523afcc 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1950,19 +1950,17 @@ let TargetGuard = "sve2p1|sme2" in { //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; -} -let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelo_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilels_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelo_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilels_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVLD1B_X2 : MInst<"svld1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; def SVLD1H_X2 : MInst<"svld1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c index 3dbb38582b676c..08c1ee949c1116 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c @@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
[clang] [llvm] [Clang][SVE2.1] Add floating-point variants of `svrevd_XX` (PR #75117)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75117 >From e11897d680dbb892aa645a6fc7f63f91fde3bd7c Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 11 Dec 2023 23:25:07 + Subject: [PATCH 1/2] [Clang][SVE2.1] Add floating-point variants of `svrevd_XX` --- clang/include/clang/Basic/arm_sve.td | 4 +- .../aarch64-sve2-intrinsics/acle_sve2_revd.c | 193 ++ llvm/lib/Target/AArch64/SMEInstrFormats.td| 6 + .../CodeGen/AArch64/sve2-intrinsics-revd.ll | 41 4 files changed, 242 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index aa9b105364a51a..91ed0c65cfc005 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2050,13 +2050,13 @@ def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aar def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } -let TargetGuard = "sve2p1" in { +let TargetGuard = "sve2p1|sme" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>; -defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; +defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">; } let TargetGuard = "sve2p1|sme2" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c index 9d912c5d9e2767..fa005c16763c0f 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c @@ -388,3 +388,196 @@ svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) { svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) { return SVE_ACLE_FUNC(svrevd, _u64, _x, )(pg, op); } + + +// CHECK-LABEL: @test_svrevd_bf16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svrevd_bf16_zu10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT:ret [[TMP1]] +// +svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _bf16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f16_zu10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT:ret [[TMP1]] +// +svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _f16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f32_zu10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT:ret [[TMP1]] +// +svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) { + return SVE_ACLE_FUNC(svrevd, _f32, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f64_z( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT:ret [[TMP1]] +// +//
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)
@@ -1702,6 +1705,62 @@ void SVEEmitter::createSMERangeChecks(raw_ostream ) { OS << "#endif\n\n"; } +void SVEEmitter::createStreamingAttrs(raw_ostream , ACLEKind Kind) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) +createIntrinsic(R, Defs); + + // The mappings must be sorted based on BuiltinID. + llvm::sort(Defs, [](const std::unique_ptr , + const std::unique_ptr ) { +return A->getMangledName() < B->getMangledName(); + }); + + StringRef ExtensionKind; + switch (Kind) { + case ACLEKind::SME: +ExtensionKind = "SME"; +break; + case ACLEKind::SVE: +ExtensionKind = "SVE"; +break; + } + + OS << "#ifdef GET_" << ExtensionKind << "_STREAMING_ATTRS\n"; + + // Ensure these are only emitted once. + std::set Emitted; + llvm::StringMap> StreamingMap; momchil-velikov wrote: I'd suggest not using `std::set` at all, sort it like it's done now and use `DenseSet` https://github.com/llvm/llvm-project/pull/75487 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/75596 According to the latest update of the ISA https://developer.arm.com/documentation/ddi0602/2023-09/?lang=en all of the affected instruction encodings now require (FEAT_SVE2 or FEAT_SME2) and FEAT_SVE_B16B16 >From fa5fbcb55eceb02ea9d516922cfa3a7e23ec8faf Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 15 Dec 2023 12:18:53 + Subject: [PATCH] [AArch64] Update target feature requirements of SVE bfloat instructions According to the latest update of the ISA https://developer.arm.com/documentation/ddi0602/2023-09/?lang=en all of the affected instruction encodings now require (FEAT_SVE2 or FEAT_SME2) and FEAT_SVE_B16B16 --- clang/include/clang/Basic/arm_sve.td | 2 +- .../acle_sve2p1_bfadd.c | 11 ++-- .../acle_sve2p1_bfmax.c | 11 ++-- .../acle_sve2p1_bfmaxnm.c | 11 ++-- .../acle_sve2p1_bfmin.c | 11 ++-- .../acle_sve2p1_bfminnm.c | 11 ++-- .../acle_sve2p1_bfmla.c | 11 ++-- .../acle_sve2p1_bfmls.c | 11 ++-- .../acle_sve2p1_bfmul.c | 11 ++-- .../acle_sve2p1_bfsub.c | 11 ++-- llvm/lib/Target/AArch64/AArch64.td| 4 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++-- llvm/test/MC/AArch64/SVE2p1/bfadd.s | 43 ++-- llvm/test/MC/AArch64/SVE2p1/bfclamp.s | 32 llvm/test/MC/AArch64/SVE2p1/bfmax.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmin.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfminnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmla.s | 44 +--- llvm/test/MC/AArch64/SVE2p1/bfmls.s | 45 +--- llvm/test/MC/AArch64/SVE2p1/bfmul.s | 51 +++ llvm/test/MC/AArch64/SVE2p1/bfsub.s | 43 ++-- 22 files changed, 311 insertions(+), 198 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index aa9b105364a51a..b53409a3e1656a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2066,7 +2066,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">; defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">; defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c index 327c4f078872b3..a3026fee3f6d29 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c @@ -1,10 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s momchil-velikov wrote: Yes, that's what I approximately did in another patch (not yet uploaded): ``` // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s ``` The first line with `sme2` and an *extra* end-to-end run with `sme2`. I'll update like this the rest of the in-flight PRs. https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -148,10 +151,10 @@ void test_svpmov_lane(){ svuint64_t zn_u64; svbool_t pn; - svpmov_lane_u8(zn_u8, -1); // expected-error {{argument value -1 is outside the valid range [0, 0]}} - svpmov_lane_u16(zn_u16, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} - svpmov_lane_u32(zn_u32, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} - svpmov_lane_u64(zn_u64, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + svpmov_lane_u8(zn_u8, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 0]}} momchil-velikov wrote: Yes, the test wasn't running. It's fixed in a followup commit. https://github.com/llvm/llvm-project/pull/75200/commits/3406e10bac69bd3d091bca6aa368f646fb4506e8 https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s momchil-velikov wrote: I meant something else. Each run line tests all the functions, i.e: * first line tests all functions, with sve2p1 attribute, compiled as C * second line tests all functions, with sve2p1 attribute, compiled as C++ * third line tests all functions, short name versions, with sve2p1 attribute, compiled as C * fourth line tests all functions, short name versions, with sve2p1 attribute, compiled as C++ We can surely add four more lines, with sme2 attribute. But what if we have just: * first line tests all functions, with **sme2** attribute, compiled as C * second line tests all functions, with sve2p1 attribute, compiled as C++ * third line tests all functions, short name versions, with sve2p1 attribute, compiled as C * fourth line tests all functions, short name versions, with sve2p1 attribute, compiled as C++ What are we going to lose as test coverage, compared to a variant with eight run lines? Some odd defect where say, combination of C++ and short/overloaded names misses a check for target feature or streaming attribute? But that's *very very likely* not dependent on *this* set of functions, so it's sufficient to have just one function tested with all the combinations and I've already seen such test files. https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s momchil-velikov wrote: Yes, we do. I'll add it. Now a questions is do we need a copy of all RUN lines (so we have a set with "+sve2p1" and the same set but with "+sme") Since the goal is to test this part ``` let TargetGuard = "sve2p1|sme2 in { ... ``` I would just change a single run line to use "+sme2" and consider that to provide enough test coverage (even though, strictly speaking, someone could move just one line outside that braces). My problem is that running tests already takes unreasonable amount of time, so I'd rather not make it worse. Thoughts? https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Add intrinsics for `WHILEcc` resulting in predicate pair (PR #75107)
@@ -1341,6 +1341,26 @@ def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNon def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile]>; } +let TargetGuard = "sve2p1|sme2" in { + def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2">; + def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2">; + def SVWHILEHI_S64_X2 : SInst<"svwhilehi_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2">; + def SVWHILEHS_S64_X2 : SInst<"svwhilehs_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs_x2">; + def SVWHILELE_S64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele_x2">; + def SVWHILELO_S64_X2 : SInst<"svwhilelo_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2">; + def SVWHILELS_S64_X2 : SInst<"svwhilels_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2">; + def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2">; + + def SVWHILEGE_U64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2">; + def SVWHILEGT_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2">; + def SVWHILEHI_U64_X2 : SInst<"svwhilehi_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2">; + def SVWHILEHS_U64_X2 : SInst<"svwhilehs_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs_x2">; + def SVWHILELE_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilele_x2">; + def SVWHILELO_U64_X2 : SInst<"svwhilelo_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2">; + def SVWHILELS_U64_X2 : SInst<"svwhilels_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2">; + def SVWHILELT_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2">; +} momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/75107 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -1950,19 +1950,17 @@ let TargetGuard = "sve2p1|sme2" in { //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; -} -let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelo_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilels_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilele_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75200 >From bb881371fb036819a1d6489a9779e2c5ac7e7d3c Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 12 Dec 2023 15:08:33 + Subject: [PATCH 1/3] [Clang][SVE2.1] Make the part of the name optional for `svewhileXX` builtins with predicate-as-counter The `_s64`/`_u64` part can be omitted now. It's inferred from the argument types. --- clang/include/clang/Basic/arm_sve.td | 18 ++- .../acle_sve2p1_while_pn.c| 136 +- 2 files changed, 80 insertions(+), 74 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index aa9b105364a51a..004a381523afcc 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1950,19 +1950,17 @@ let TargetGuard = "sve2p1|sme2" in { //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; -} -let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelo_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilels_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelo_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilels_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVLD1B_X2 : MInst<"svld1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; def SVLD1H_X2 : MInst<"svld1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c index 3dbb38582b676c..08c1ee949c1116 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c @@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits