bryanpkc updated this revision to Diff 492679.
bryanpkc retitled this revision from "[Clang][AArch64] Add SME ldr and str 
intrinsic" to "[Clang][AArch64][SME] Add intrinsics for ZA array load/store 
(LDR/STR)".
bryanpkc edited the summary of this revision.
bryanpkc added a comment.

Rebased and cleaned up the patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D134678/new/

https://reviews.llvm.org/D134678

Files:
  clang/include/clang/Basic/TargetBuiltins.h
  clang/include/clang/Basic/arm_sme.td
  clang/include/clang/Basic/arm_sve_sme_incl.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
  clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c

Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
@@ -0,0 +1,43 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+
+// CHECK-C-LABEL: @test_svstr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svstr_vnum_zajPv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]])
+// CHECK-NEXT:    ret void
+//
+void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
+  svstr_vnum_za(slice_base, 0, ptr);
+}
+
+// CHECK-C-LABEL: @test_svstr_vnum_za_1(
+// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
+  svstr_vnum_za(slice_base, 15, ptr);
+}
+
+// CHECK-C-LABEL: @test_svstr_vnum_za_2(
+// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_2jPv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) {
+  svstr_vnum_za(slice_base, 16, ptr);
+}
Index: clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -0,0 +1,42 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPKv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]])
+// CHECK-NEXT:    ret void
+//
+void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
+  svldr_vnum_za(slice_base, 0, ptr);
+}
+
+// CHECK-C-LABEL: @test_svldr_vnum_za_1(
+// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
+  svldr_vnum_za(slice_base, 15, ptr);
+}
+
+// CHECK-C-LABEL: @test_svldr_vnum_za_2(
+// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_2jPKv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[MULVL:%.*]] = shl nuw nsw i64 [[VSCALE]], 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) {
+  svldr_vnum_za(slice_base, 16, ptr);
+}
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4259,6 +4259,9 @@
   llvm::Value *EmitSMEZero(SMETypeFlags TypeFlags,
                            llvm::SmallVectorImpl<llvm::Value *> &Ops,
                            unsigned IntID);
+  llvm::Value *EmitSMELdrStr(SMETypeFlags TypeFlags,
+                             llvm::SmallVectorImpl<llvm::Value *> &Ops,
+                             unsigned IntID);
   llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -9448,6 +9448,21 @@
   return Builder.CreateCall(F, Ops);
 }
 
+Value *CodeGenFunction::EmitSMELdrStr(SMETypeFlags TypeFlags,
+                                      SmallVectorImpl<Value *> &Ops,
+                                      unsigned IntID) {
+  Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty);
+  llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale");
+  llvm::Value *MulVL = Builder.CreateMul(
+      VscaleCall,
+      Builder.getInt64(16 * cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
+      "mulvl");
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL);
+  Ops.erase(&Ops[1]);
+  Function *F = CGM.getIntrinsic(IntID, {});
+  return Builder.CreateCall(F, Ops);
+}
+
 // Limit the usage of scalable llvm IR generated by the ACLE by using the
 // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
 Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
@@ -9906,6 +9921,8 @@
     return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
   else if (TypeFlags.isZero())
     return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+  else if (TypeFlags.isLoadReg() || TypeFlags.isStoreReg())
+    return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
 
   /// Should not happen
   return nullptr;
Index: clang/include/clang/Basic/arm_sve_sme_incl.td
===================================================================
--- clang/include/clang/Basic/arm_sve_sme_incl.td
+++ clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -217,6 +217,8 @@
 def IsPreservesZA             : FlagType<0x4000000000>;
 def IsMove                    : FlagType<0x8000000000>;
 def IsZero                    : FlagType<0x10000000000>;
+def IsLoadReg                 : FlagType<0x20000000000>;
+def IsStoreReg                : FlagType<0x40000000000>;
 
 // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
 class ImmCheckType<int val> {
Index: clang/include/clang/Basic/arm_sme.td
===================================================================
--- clang/include/clang/Basic/arm_sme.td
+++ clang/include/clang/Basic/arm_sme.td
@@ -40,6 +40,8 @@
 def SVLD1_VER_VNUM_ZA64 : MInst<"svld1_ver_vnum_za64", "vimiPQl", "l", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1d_vert">;
 def SVLD1_VER_VNUM_ZA128 : MInst<"svld1_ver_vnum_za128", "vimiPQl", "q", [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, "aarch64_sme_ld1q_vert">;
 
+def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmiQ", "", [IsOverloadNone, IsLoadReg, IsStreamingCompatible, IsSharedZA], MemEltTyDefault, "aarch64_sme_ldr">;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Stores
 
@@ -65,6 +67,8 @@
 def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_vert">;
 def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_vert">;
 
+def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vmi%", "", [IsOverloadNone, IsStoreReg, IsStreamingCompatible, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_str">;
+
 ////////////////////////////////////////////////////////////////////////////////
 // SME - Read horizontal/vertical ZA slices
 
Index: clang/include/clang/Basic/TargetBuiltins.h
===================================================================
--- clang/include/clang/Basic/TargetBuiltins.h
+++ clang/include/clang/Basic/TargetBuiltins.h
@@ -366,6 +366,8 @@
     bool isStore() const { return Flags & IsStore; }
     bool isMove() const { return Flags & IsMove; }
     bool isZero() const { return Flags & IsZero; }
+    bool isLoadReg() const { return Flags & IsLoadReg; }
+    bool isStoreReg() const { return Flags & IsStoreReg; }
 
     uint64_t getBits() const { return Flags; }
     bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to