This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGfc28f600e558: [AMDGPU] Restore the s_memtime instruction in 
gfx1030 (authored by foad).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97928/new/

https://reviews.llvm.org/D97928

Files:
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl
  llvm/lib/Target/AMDGPU/AMDGPU.td
  llvm/lib/Target/AMDGPU/GCNSubtarget.h
  llvm/lib/Target/AMDGPU/SMInstructions.td
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
  llvm/test/MC/AMDGPU/gfx1030_err.s

Index: llvm/test/MC/AMDGPU/gfx1030_err.s
===================================================================
--- llvm/test/MC/AMDGPU/gfx1030_err.s
+++ llvm/test/MC/AMDGPU/gfx1030_err.s
@@ -21,9 +21,6 @@
 s_get_waveid_in_workgroup s0
 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
-s_memtime s[0:1]
-// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
-
 s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK)
 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: specified hardware register is not supported on this GPU
 
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=SIVI,GCN %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=SIVI,GCN %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GFX1030-ERR %s
+; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 declare i64 @llvm.amdgcn.s.memtime() #0
 
@@ -13,7 +13,6 @@
 ; SIVI-NOT: lgkmcnt
 ; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: {{buffer|global}}_store_dwordx2
-; GFX1030-ERR: ERROR
 define amdgpu_kernel void @test_s_memtime(i64 addrspace(1)* %out) #0 {
   %cycle0 = call i64 @llvm.amdgcn.s.memtime()
   store volatile i64 %cycle0, i64 addrspace(1)* %out
Index: llvm/lib/Target/AMDGPU/SMInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SMInstructions.td
+++ llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -866,14 +866,16 @@
 >;
 } // let OtherPredicates = [HasSMemTimeInst]
 
-let OtherPredicates = [HasNoSMemTimeInst] in {
+let OtherPredicates = [HasShaderCyclesRegister] in {
 def : GCNPat <
   (i64 (readcyclecounter)),
   (REG_SEQUENCE SReg_64,
     (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0,
-    (S_MOV_B32 (i32 0)), sub1)
->;
-} // let OtherPredicates = [HasNoSMemTimeInst]
+    (S_MOV_B32 (i32 0)), sub1)> {
+  // Prefer this to s_memtime because it has lower and more predictable latency.
+  let AddedComplexity = 1;
+}
+} // let OtherPredicates = [HasShaderCyclesRegister]
 
 //===----------------------------------------------------------------------===//
 // GFX10.
Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -163,6 +163,7 @@
   bool HasVscnt;
   bool HasGetWaveIdInst;
   bool HasSMemTimeInst;
+  bool HasShaderCyclesRegister;
   bool HasRegisterBanking;
   bool HasVOP3Literal;
   bool HasNoDataDepHazard;
@@ -714,6 +715,10 @@
     return HasSMemTimeInst;
   }
 
+  bool hasShaderCyclesRegister() const {
+    return HasShaderCyclesRegister;
+  }
+
   bool hasRegisterBanking() const {
     return HasRegisterBanking;
   }
Index: llvm/lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPU.td
+++ llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -563,6 +563,12 @@
   "Has s_memtime instruction"
 >;
 
+def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register",
+  "HasShaderCyclesRegister",
+  "true",
+  "Has SHADER_CYCLES hardware register"
+>;
+
 def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts",
   "HasMadMacF32Insts",
   "true",
@@ -777,7 +783,7 @@
    FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
    FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
    FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
-   FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
+   FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
    FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
   ]
 >;
@@ -988,7 +994,6 @@
      FeatureScalarAtomics,
      FeatureScalarFlatScratchInsts,
      FeatureGetWaveIdInst,
-     FeatureSMemTimeInst,
      FeatureMadMacF32Insts,
      FeatureDsSrc2Insts,
      FeatureLdsMisalignedBug,
@@ -1009,7 +1014,6 @@
      FeatureScalarAtomics,
      FeatureScalarFlatScratchInsts,
      FeatureGetWaveIdInst,
-     FeatureSMemTimeInst,
      FeatureMadMacF32Insts,
      FeatureDsSrc2Insts,
      FeatureLdsMisalignedBug,
@@ -1030,7 +1034,6 @@
      FeatureScalarAtomics,
      FeatureScalarFlatScratchInsts,
      FeatureGetWaveIdInst,
-     FeatureSMemTimeInst,
      FeatureMadMacF32Insts,
      FeatureDsSrc2Insts,
      FeatureLdsMisalignedBug,
@@ -1047,7 +1050,8 @@
    FeatureDot5Insts,
    FeatureDot6Insts,
    FeatureNSAEncoding,
-   FeatureWavefrontSize32]>;
+   FeatureWavefrontSize32,
+   FeatureShaderCyclesRegister]>;
 
 //===----------------------------------------------------------------------===//
 
@@ -1377,7 +1381,8 @@
 def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
   AssemblerPredicate<(all_of FeatureSMemTimeInst)>;
 
-def HasNoSMemTimeInst : Predicate<"!Subtarget->hasSMemTimeInst()">;
+def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">,
+  AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>;
 
 def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
   AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>;
Index: clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl
===================================================================
--- clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl
+++ /dev/null
@@ -1,7 +0,0 @@
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1030 -verify -S -o - %s
-
-void test_gfx1030_s_memtime()
-{
-  __builtin_amdgcn_s_memtime(); // expected-error {{'__builtin_amdgcn_s_memtime' needs target feature s-memtime-inst}}
-}
Index: clang/test/CodeGenOpenCL/amdgpu-features.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -58,9 +58,9 @@
 // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
 // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
 // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
+// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
+// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
+// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
 
 kernel void test() {}
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -192,6 +192,7 @@
       Features["gfx10-insts"] = true;
       Features["gfx10-3-insts"] = true;
       Features["s-memrealtime"] = true;
+      Features["s-memtime-inst"] = true;
       break;
     case GK_GFX1012:
     case GK_GFX1011:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to