[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-24 Thread Mariusz Sikora via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGea064ee2a3bd: [AMDGPU] Create Subtarget Features for some of 
16 bits atomic fadd instructions (authored by mariusz-sikora-at-amd).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
  llvm/lib/Target/AMDGPU/AMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
  llvm/lib/Target/AMDGPU/BUFInstructions.td
  llvm/lib/Target/AMDGPU/DSInstructions.td
  llvm/lib/Target/AMDGPU/FLATInstructions.td
  llvm/lib/Target/AMDGPU/GCNSubtarget.h

Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -152,9 +152,13 @@
   bool HasMAIInsts = false;
   bool HasFP8Insts = false;
   bool HasPkFmacF16Inst = false;
+  bool HasAtomicDsPkAdd16Insts = false;
+  bool HasAtomicFlatPkAdd16Insts = false;
   bool HasAtomicFaddRtnInsts = false;
   bool HasAtomicFaddNoRtnInsts = false;
-  bool HasAtomicPkFaddNoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16Insts = false;
+  bool HasAtomicGlobalPkAddBF16Inst = false;
   bool HasFlatAtomicFaddF32Inst = false;
   bool SupportsSRAMECC = false;
 
@@ -758,6 +762,10 @@
 return HasPkFmacF16Inst;
   }
 
+  bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
+
+  bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }
+
   bool hasAtomicFaddInsts() const {
 return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
   }
@@ -766,7 +774,17 @@
 
   bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
 
-  bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
+  bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {
+return HasAtomicBufferGlobalPkAddF16NoRtnInsts;
+  }
+
+  bool hasAtomicBufferGlobalPkAddF16Insts() const {
+return HasAtomicBufferGlobalPkAddF16Insts;
+  }
+
+  bool hasAtomicGlobalPkAddBF16Inst() const {
+return HasAtomicGlobalPkAddBF16Inst;
+  }
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
Index: llvm/lib/Target/AMDGPU/FLATInstructions.td
===
--- llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -730,11 +730,13 @@
   defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
 } // End SubtargetPredicate = isGFX90APlus
 
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
   defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
   defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>;
+} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
+
+let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in
   defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>;
-} // End SubtargetPredicate = isGFX940Plus
 
 // GFX7-, GFX10-, GFX11-only flat instructions.
 let SubtargetPredicate = isGFX7GFX10GFX11 in {
@@ -938,7 +940,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -946,7 +948,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [isGFX90APlus] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -1505,7 +1507,7 @@
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
 }
 
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in {
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", 

[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-24 Thread Jay Foad via Phabricator via cfe-commits
foad accepted this revision.
foad added a comment.
This revision is now accepted and ready to land.

LGTM, thanks!

If you want to remove some of the other unnecessary predicates from Real 
instructions you could do that in a separate NFC patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Mariusz Sikora via Phabricator via cfe-commits
mariusz-sikora-at-amd updated this revision to Diff 507871.
mariusz-sikora-at-amd added a comment.

Put back Real Instructions under SubtargetPredicate associated with gfx 
generation.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
  llvm/lib/Target/AMDGPU/AMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
  llvm/lib/Target/AMDGPU/BUFInstructions.td
  llvm/lib/Target/AMDGPU/DSInstructions.td
  llvm/lib/Target/AMDGPU/FLATInstructions.td
  llvm/lib/Target/AMDGPU/GCNSubtarget.h

Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -152,9 +152,13 @@
   bool HasMAIInsts = false;
   bool HasFP8Insts = false;
   bool HasPkFmacF16Inst = false;
+  bool HasAtomicDsPkAdd16Insts = false;
+  bool HasAtomicFlatPkAdd16Insts = false;
   bool HasAtomicFaddRtnInsts = false;
   bool HasAtomicFaddNoRtnInsts = false;
-  bool HasAtomicPkFaddNoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16Insts = false;
+  bool HasAtomicGlobalPkAddBF16Inst = false;
   bool HasFlatAtomicFaddF32Inst = false;
   bool SupportsSRAMECC = false;
 
@@ -758,6 +762,10 @@
 return HasPkFmacF16Inst;
   }
 
+  bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
+
+  bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }
+
   bool hasAtomicFaddInsts() const {
 return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
   }
@@ -766,7 +774,17 @@
 
   bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
 
-  bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
+  bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {
+return HasAtomicBufferGlobalPkAddF16NoRtnInsts;
+  }
+
+  bool hasAtomicBufferGlobalPkAddF16Insts() const {
+return HasAtomicBufferGlobalPkAddF16Insts;
+  }
+
+  bool hasAtomicGlobalPkAddBF16Inst() const {
+return HasAtomicGlobalPkAddBF16Inst;
+  }
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
Index: llvm/lib/Target/AMDGPU/FLATInstructions.td
===
--- llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -730,11 +730,13 @@
   defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
 } // End SubtargetPredicate = isGFX90APlus
 
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
   defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
   defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>;
+} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
+
+let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in
   defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>;
-} // End SubtargetPredicate = isGFX940Plus
 
 // GFX7-, GFX10-, GFX11-only flat instructions.
 let SubtargetPredicate = isGFX7GFX10GFX11 in {
@@ -938,7 +940,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -946,7 +948,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [isGFX90APlus] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -1505,7 +1507,7 @@
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
 }
 
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in {
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
 }
@@ -1516,14 +1518,17 @@
 defm : GlobalFLATAtomicPatsRtnWithAddrSpace 

[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Mariusz Sikora via Phabricator via cfe-commits
mariusz-sikora-at-amd added inline comments.



Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915
 
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
+defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;

foad wrote:
> mariusz-sikora-at-amd wrote:
> > foad wrote:
> > > Are these changes (from here to the end of the file) still required?
> > Not sure if I understand what you mean. Could you please elaborate more ? 
> > Are you referring to the fact that both flat_atomic and global_atomic have 
> > FLAT encoding and could be unified ?
> > I thought this is required, but now you got me thinking ...
> I don't understand why the changes from here to the end of the file are 
> required. It looks like you have just moved some definitions around, so that 
> they no longer have a SubtargetPredicate applied. Is that correct? Why?
Yes, I moved them out from under the SubtargetPredicates. All tests are 
passing. Even 'negative' for unsupported generations, but after deeper looking 
into 'gen-instr-info' from TableGen I see that there is a difference for 
GLOBAL_ATOMIC_PK_ADD_F16_*_vi definitions of Real Instructions.

1) I'm still convinced that SubtargetPredicate = isGFX940Plus is not needed for 
definitions which inherit from FLAT_Global_Real_Atomics_gfx940. Multiclass sets 
AssemblerPredicate = isGFX940Plus and when looking how Predicates list is 
created we can see that AssemblerPredicate is used in creation of final list of 
Predicates.

  list Predicates = PredConcat<
 PredConcat.ret,
AssemblerPredicate>.ret,
 WaveSizePredicate>.ret;


2) In case of FLAT_Global_Real_Atomics_vi I see that SubtargetPredicate = 
isGFX8GFX9NotGFX940 will be required, because AssemblerPredicate is isGFX8GFX9;
I wonder why everything works event when not having these SubtargetPredicates = 
isGFX8GFX9NotGFX940


I will revert these changes and move definitions back under the 
SubtargetPredicates. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Jay Foad via Phabricator via cfe-commits
foad added inline comments.



Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2891
 
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
+defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>;

mariusz-sikora-at-amd wrote:
> foad wrote:
> > Is this still required?
> No. We can remove this. But I wanted to limit this change only to atomic 
> f16/bf16 and not going deeper 
OK. I thought this was something you added in this patch, but now I see it is 
just moved around.



Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915
 
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
+defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;

mariusz-sikora-at-amd wrote:
> foad wrote:
> > Are these changes (from here to the end of the file) still required?
> Not sure if I understand what you mean. Could you please elaborate more ? Are 
> you referring to the fact that both flat_atomic and global_atomic have FLAT 
> encoding and could be unified ?
> I thought this is required, but now you got me thinking ...
I don't understand why the changes from here to the end of the file are 
required. It looks like you have just moved some definitions around, so that 
they no longer have a SubtargetPredicate applied. Is that correct? Why?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Mariusz Sikora via Phabricator via cfe-commits
mariusz-sikora-at-amd added inline comments.



Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2891
 
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
+defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>;

foad wrote:
> Is this still required?
No. We can remove this. But I wanted to limit this change only to atomic 
f16/bf16 and not going deeper 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Mariusz Sikora via Phabricator via cfe-commits
mariusz-sikora-at-amd added inline comments.



Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915
 
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
+defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;

foad wrote:
> Are these changes (from here to the end of the file) still required?
Not sure if I understand what you mean. Could you please elaborate more ? Are 
you referring to the fact that both flat_atomic and global_atomic have FLAT 
encoding and could be unified ?
I thought this is required, but now you got me thinking ...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Jay Foad via Phabricator via cfe-commits
foad added inline comments.



Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2891
 
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
+defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>;

Is this still required?



Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915
 
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
+defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;

Are these changes (from here to the end of the file) still required?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Mariusz Sikora via Phabricator via cfe-commits
mariusz-sikora-at-amd updated this revision to Diff 507722.
mariusz-sikora-at-amd added a comment.

Changes after review.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
  llvm/lib/Target/AMDGPU/AMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
  llvm/lib/Target/AMDGPU/BUFInstructions.td
  llvm/lib/Target/AMDGPU/DSInstructions.td
  llvm/lib/Target/AMDGPU/FLATInstructions.td
  llvm/lib/Target/AMDGPU/GCNSubtarget.h

Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -152,9 +152,13 @@
   bool HasMAIInsts = false;
   bool HasFP8Insts = false;
   bool HasPkFmacF16Inst = false;
+  bool HasAtomicDsPkAdd16Insts = false;
+  bool HasAtomicFlatPkAdd16Insts = false;
   bool HasAtomicFaddRtnInsts = false;
   bool HasAtomicFaddNoRtnInsts = false;
-  bool HasAtomicPkFaddNoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16Insts = false;
+  bool HasAtomicGlobalPkAddBF16Inst = false;
   bool HasFlatAtomicFaddF32Inst = false;
   bool SupportsSRAMECC = false;
 
@@ -758,6 +762,10 @@
 return HasPkFmacF16Inst;
   }
 
+  bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
+
+  bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }
+
   bool hasAtomicFaddInsts() const {
 return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
   }
@@ -766,7 +774,17 @@
 
   bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
 
-  bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
+  bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {
+return HasAtomicBufferGlobalPkAddF16NoRtnInsts;
+  }
+
+  bool hasAtomicBufferGlobalPkAddF16Insts() const {
+return HasAtomicBufferGlobalPkAddF16Insts;
+  }
+
+  bool hasAtomicGlobalPkAddBF16Inst() const {
+return HasAtomicGlobalPkAddBF16Inst;
+  }
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
Index: llvm/lib/Target/AMDGPU/FLATInstructions.td
===
--- llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -730,11 +730,14 @@
   defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
 } // End SubtargetPredicate = isGFX90APlus
 
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
   defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
   defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>;
+} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
+
+let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in {
   defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>;
-} // End SubtargetPredicate = isGFX940Plus
+} // End SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst
 
 // GFX7-, GFX10-, GFX11-only flat instructions.
 let SubtargetPredicate = isGFX7GFX10GFX11 in {
@@ -938,7 +941,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -946,7 +949,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [isGFX90APlus] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -1505,7 +1508,7 @@
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
 }
 
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in {
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
 }
@@ -1516,14 +1519,17 @@
 defm : GlobalFLATAtomicPatsRtnWithAddrSpace 

[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Jay Foad via Phabricator via cfe-commits
foad added inline comments.



Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2889
 
-defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>;
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
 defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;

mariusz-sikora-at-amd wrote:
> foad wrote:
> > Could remove the braces if you prefer - then you don't need the "End" 
> > comment either.
> So, as I understand from other comment:
> 
> > Generally Real instructions copy their predicates from the corresponding 
> > Pseudo, so this should not be required here. Please check the other places 
> > where you have added predicates to Real instructions too.
> 
> We do not need this (L2889) Predicate, because it was added to Pseudo 
> Instruction ?
Correct. See the places commented "copy relevant pseudo op flags" in this file.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Mariusz Sikora via Phabricator via cfe-commits
mariusz-sikora-at-amd added inline comments.



Comment at: clang/include/clang/Basic/BuiltinsAMDGPU.def:233
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", 
"atomic-global-pk-add-bf16-inst")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", 
"atomic-ds-pk-add-16-insts")
 

foad wrote:
> So __builtin_amdgcn_ds_atomic_fadd_v2f16 is missing here? (Just curious- I am 
> not asking you to add it in this patch.)
I have a different change which will add only 
__builtin_amdgcn_ds_atomic_fadd_v2f16 (not pushed yet, because it depends on 
this change).



Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2889
 
-defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>;
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
 defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;

foad wrote:
> Could remove the braces if you prefer - then you don't need the "End" comment 
> either.
So, as I understand from other comment:

> Generally Real instructions copy their predicates from the corresponding 
> Pseudo, so this should not be required here. Please check the other places 
> where you have added predicates to Real instructions too.

We do not need this (L2889) Predicate, because it was added to Pseudo 
Instruction ?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Jay Foad via Phabricator via cfe-commits
foad added inline comments.



Comment at: clang/include/clang/Basic/BuiltinsAMDGPU.def:233
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", 
"atomic-global-pk-add-bf16-inst")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", 
"atomic-ds-pk-add-16-insts")
 

So __builtin_amdgcn_ds_atomic_fadd_v2f16 is missing here? (Just curious- I am 
not asking you to add it in this patch.)



Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2889
 
-defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>;
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
 defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;

Could remove the braces if you prefer - then you don't need the "End" comment 
either.



Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1916
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
+

Generally Real instructions copy their predicates from the corresponding 
Pseudo, so this should not be required here. Please check the other places 
where you have added predicates to Real instructions too.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146701/new/

https://reviews.llvm.org/D146701

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions

2023-03-23 Thread Mariusz Sikora via Phabricator via cfe-commits
mariusz-sikora-at-amd created this revision.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard, 
yaxunl, jvesely, kzhuravl, arsenm.
Herald added a project: All.
mariusz-sikora-at-amd requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, wdng.
Herald added projects: clang, LLVM.

Introducing Subtarget Features for instructions:

- ds_pk_add_bf16
- ds_pk_add_f16
- ds_pk_add_rtn_bf16
- ds_pk_add_rtn_f16
- flat_atomic_pk_add_f16
- flat_atomic_pk_add_bf16
- global_atomic_pk_add_f16
- global_atomic_pk_add_bf16
- buffer_atomic_pk_add_f16


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146701

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
  llvm/lib/Target/AMDGPU/AMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
  llvm/lib/Target/AMDGPU/BUFInstructions.td
  llvm/lib/Target/AMDGPU/DSInstructions.td
  llvm/lib/Target/AMDGPU/FLATInstructions.td
  llvm/lib/Target/AMDGPU/GCNSubtarget.h

Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -152,9 +152,13 @@
   bool HasMAIInsts = false;
   bool HasFP8Insts = false;
   bool HasPkFmacF16Inst = false;
+  bool HasAtomicDsPkAdd16Insts = false;
+  bool HasAtomicFlatPkAdd16Insts = false;
   bool HasAtomicFaddRtnInsts = false;
   bool HasAtomicFaddNoRtnInsts = false;
-  bool HasAtomicPkFaddNoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
+  bool HasAtomicBufferGlobalPkAddF16Insts = false;
+  bool HasAtomicGlobalPkAddBF16Inst = false;
   bool HasFlatAtomicFaddF32Inst = false;
   bool SupportsSRAMECC = false;
 
@@ -758,6 +762,10 @@
 return HasPkFmacF16Inst;
   }
 
+  bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
+
+  bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }
+
   bool hasAtomicFaddInsts() const {
 return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
   }
@@ -766,7 +774,17 @@
 
   bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
 
-  bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
+  bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {
+return HasAtomicBufferGlobalPkAddF16NoRtnInsts;
+  }
+
+  bool hasAtomicBufferGlobalPkAddF16Insts() const {
+return HasAtomicBufferGlobalPkAddF16Insts;
+  }
+
+  bool hasAtomicGlobalPkAddBF16Inst() const {
+return HasAtomicGlobalPkAddBF16Inst;
+  }
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
Index: llvm/lib/Target/AMDGPU/FLATInstructions.td
===
--- llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -730,11 +730,14 @@
   defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
 } // End SubtargetPredicate = isGFX90APlus
 
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
   defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
   defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>;
+} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
+
+let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in {
   defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>;
-} // End SubtargetPredicate = isGFX940Plus
+} // End SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst
 
 // GFX7-, GFX10-, GFX11-only flat instructions.
 let SubtargetPredicate = isGFX7GFX10GFX11 in {
@@ -938,7 +941,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -946,7 +949,7 @@
   defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_add_f32", VGPR_32, f32
   >;
-let OtherPredicates = [isGFX90APlus] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
   defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
 "global_atomic_pk_add_f16", VGPR_32, v2f16
   >;
@@ -1505,7 +1508,7 @@
 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
 }
 
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in {
+let