bryanpkc updated this revision to Diff 492679. bryanpkc retitled this revision from "[Clang][AArch64] Add SME ldr and str intrinsic" to "[Clang][AArch64][SME] Add intrinsics for ZA array load/store (LDR/STR)". bryanpkc edited the summary of this revision. bryanpkc added a comment.
Rebased and cleaned up the patch. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D134678/new/ https://reviews.llvm.org/D134678 Files: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sme.td clang/include/clang/Basic/arm_sve_sme_incl.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -0,0 +1,43 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include <arm_sme.h> + + +// CHECK-C-LABEL: @test_svstr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svstr_vnum_zajPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 0, ptr); +} + +// CHECK-C-LABEL: @test_svstr_vnum_za_1( +// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 15, ptr); +} + +// CHECK-C-LABEL: @test_svstr_vnum_za_2( +// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_2jPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) { + svstr_vnum_za(slice_base, 16, ptr); +} Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -0,0 +1,42 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include <arm_sme.h> + +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 0, ptr); +} + +// CHECK-C-LABEL: @test_svldr_vnum_za_1( +// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 15, ptr); +} + +// CHECK-C-LABEL: @test_svldr_vnum_za_2( +// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_2jPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) { + svldr_vnum_za(slice_base, 16, ptr); +} Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4259,6 +4259,9 @@ llvm::Value *EmitSMEZero(SMETypeFlags TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); + llvm::Value *EmitSMELdrStr(SMETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -9448,6 +9448,21 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSMELdrStr(SMETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty); + llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale"); + llvm::Value *MulVL = Builder.CreateMul( + VscaleCall, + Builder.getInt64(16 * cast<llvm::ConstantInt>(Ops[1])->getZExtValue()), + "mulvl"); + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL); + Ops.erase(&Ops[1]); + Function *F = CGM.getIntrinsic(IntID, {}); + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9906,6 +9921,8 @@ return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isZero()) return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isLoadReg() || TypeFlags.isStoreReg()) + return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); /// Should not happen return nullptr; Index: clang/include/clang/Basic/arm_sve_sme_incl.td =================================================================== --- clang/include/clang/Basic/arm_sve_sme_incl.td +++ clang/include/clang/Basic/arm_sve_sme_incl.td @@ -217,6 +217,8 @@ def IsPreservesZA : FlagType<0x4000000000>; def IsMove : FlagType<0x8000000000>; def IsZero : FlagType<0x10000000000>; +def IsLoadReg : FlagType<0x20000000000>; +def IsStoreReg : FlagType<0x40000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType<int val> { Index: clang/include/clang/Basic/arm_sme.td =================================================================== --- clang/include/clang/Basic/arm_sme.td +++ clang/include/clang/Basic/arm_sme.td @@ -40,6 +40,8 @@ def SVLD1_VER_VNUM_ZA64 : MInst<"svld1_ver_vnum_za64", "vimiPQl", "l", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1d_vert">; def SVLD1_VER_VNUM_ZA128 : MInst<"svld1_ver_vnum_za128", "vimiPQl", "q", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1q_vert">; +def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmiQ", "", [IsOverloadNone, IsLoadReg, IsStreamingCompatible, IsSharedZA], MemEltTyDefault, "aarch64_sme_ldr">; + //////////////////////////////////////////////////////////////////////////////// // Stores @@ -65,6 +67,8 @@ def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_vert">; def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_vert">; +def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vmi%", "", [IsOverloadNone, IsStoreReg, IsStreamingCompatible, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_str">; + //////////////////////////////////////////////////////////////////////////////// // SME - Read horizontal/vertical ZA slices Index: clang/include/clang/Basic/TargetBuiltins.h =================================================================== --- clang/include/clang/Basic/TargetBuiltins.h +++ clang/include/clang/Basic/TargetBuiltins.h @@ -366,6 +366,8 @@ bool isStore() const { return Flags & IsStore; } bool isMove() const { return Flags & IsMove; } bool isZero() const { return Flags & IsZero; } + bool isLoadReg() const { return Flags & IsLoadReg; } + bool isStoreReg() const { return Flags & IsStoreReg; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits