[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGea064ee2a3bd: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions (authored by mariusz-sikora-at-amd). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 Files: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGenOpenCL/amdgpu-features.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/BUFInstructions.td llvm/lib/Target/AMDGPU/DSInstructions.td llvm/lib/Target/AMDGPU/FLATInstructions.td llvm/lib/Target/AMDGPU/GCNSubtarget.h Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h === --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -152,9 +152,13 @@ bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasPkFmacF16Inst = false; + bool HasAtomicDsPkAdd16Insts = false; + bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; - bool HasAtomicPkFaddNoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16Insts = false; + bool HasAtomicGlobalPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool SupportsSRAMECC = false; @@ -758,6 +762,10 @@ return HasPkFmacF16Inst; } + bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; } + + bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; } + bool hasAtomicFaddInsts() const { return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; } @@ -766,7 +774,17 @@ bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } - bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } + bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const { +return HasAtomicBufferGlobalPkAddF16NoRtnInsts; + } + + bool hasAtomicBufferGlobalPkAddF16Insts() const { +return HasAtomicBufferGlobalPkAddF16Insts; + } + + bool hasAtomicGlobalPkAddBF16Inst() const { +return HasAtomicGlobalPkAddBF16Inst; + } bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } Index: llvm/lib/Target/AMDGPU/FLATInstructions.td === --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -730,11 +730,13 @@ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = isGFX940Plus in { +let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; +} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts + +let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; -} // End SubtargetPredicate = isGFX940Plus // GFX7-, GFX10-, GFX11-only flat instructions. let SubtargetPredicate = isGFX7GFX10GFX11 in { @@ -938,7 +940,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -946,7 +948,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [isGFX90APlus] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -1505,7 +1507,7 @@ defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; } -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in { +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addr
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
foad accepted this revision. foad added a comment. This revision is now accepted and ready to land. LGTM, thanks! If you want to remove some of the other unnecessary predicates from Real instructions you could do that in a separate NFC patch. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
mariusz-sikora-at-amd updated this revision to Diff 507871. mariusz-sikora-at-amd added a comment. Put back Real Instructions under SubtargetPredicate associated with gfx generation. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 Files: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGenOpenCL/amdgpu-features.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/BUFInstructions.td llvm/lib/Target/AMDGPU/DSInstructions.td llvm/lib/Target/AMDGPU/FLATInstructions.td llvm/lib/Target/AMDGPU/GCNSubtarget.h Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h === --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -152,9 +152,13 @@ bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasPkFmacF16Inst = false; + bool HasAtomicDsPkAdd16Insts = false; + bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; - bool HasAtomicPkFaddNoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16Insts = false; + bool HasAtomicGlobalPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool SupportsSRAMECC = false; @@ -758,6 +762,10 @@ return HasPkFmacF16Inst; } + bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; } + + bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; } + bool hasAtomicFaddInsts() const { return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; } @@ -766,7 +774,17 @@ bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } - bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } + bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const { +return HasAtomicBufferGlobalPkAddF16NoRtnInsts; + } + + bool hasAtomicBufferGlobalPkAddF16Insts() const { +return HasAtomicBufferGlobalPkAddF16Insts; + } + + bool hasAtomicGlobalPkAddBF16Inst() const { +return HasAtomicGlobalPkAddBF16Inst; + } bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } Index: llvm/lib/Target/AMDGPU/FLATInstructions.td === --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -730,11 +730,13 @@ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = isGFX940Plus in { +let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; +} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts + +let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; -} // End SubtargetPredicate = isGFX940Plus // GFX7-, GFX10-, GFX11-only flat instructions. let SubtargetPredicate = isGFX7GFX10GFX11 in { @@ -938,7 +940,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -946,7 +948,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [isGFX90APlus] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -1505,7 +1507,7 @@ defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; } -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in { +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; } @@ -1516,14 +1518,17 @@ defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBA
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
mariusz-sikora-at-amd added inline comments. Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915 +defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; +defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; foad wrote: > mariusz-sikora-at-amd wrote: > > foad wrote: > > > Are these changes (from here to the end of the file) still required? > > Not sure if I understand what you mean. Could you please elaborate more ? > > Are you referring to the fact that both flat_atomic and global_atomic have > > FLAT encoding and could be unified ? > > I thought this is required, but now you got me thinking ... > I don't understand why the changes from here to the end of the file are > required. It looks like you have just moved some definitions around, so that > they no longer have a SubtargetPredicate applied. Is that correct? Why? Yes, I moved them out from under the SubtargetPredicates. All tests are passing. Even 'negative' for unsupported generations, but after deeper looking into 'gen-instr-info' from TableGen I see that there is a difference for GLOBAL_ATOMIC_PK_ADD_F16_*_vi definitions of Real Instructions. 1) I'm still convinced that SubtargetPredicate = isGFX940Plus is not needed for definitions which inherit from FLAT_Global_Real_Atomics_gfx940. Multiclass sets AssemblerPredicate = isGFX940Plus and when looking how Predicates list is created we can see that AssemblerPredicate is used in creation of final list of Predicates. list Predicates = PredConcat< PredConcat.ret, AssemblerPredicate>.ret, WaveSizePredicate>.ret; 2) In case of FLAT_Global_Real_Atomics_vi I see that SubtargetPredicate = isGFX8GFX9NotGFX940 will be required, because AssemblerPredicate is isGFX8GFX9; I wonder why everything works event when not having these SubtargetPredicates = isGFX8GFX9NotGFX940 I will revert these changes and move definitions back under the SubtargetPredicates. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
foad added inline comments. Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2891 +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in { +defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>; mariusz-sikora-at-amd wrote: > foad wrote: > > Is this still required? > No. We can remove this. But I wanted to limit this change only to atomic > f16/bf16 and not going deeper OK. I thought this was something you added in this patch, but now I see it is just moved around. Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915 +defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; +defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; mariusz-sikora-at-amd wrote: > foad wrote: > > Are these changes (from here to the end of the file) still required? > Not sure if I understand what you mean. Could you please elaborate more ? Are > you referring to the fact that both flat_atomic and global_atomic have FLAT > encoding and could be unified ? > I thought this is required, but now you got me thinking ... I don't understand why the changes from here to the end of the file are required. It looks like you have just moved some definitions around, so that they no longer have a SubtargetPredicate applied. Is that correct? Why? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
mariusz-sikora-at-amd added inline comments. Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2891 +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in { +defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>; foad wrote: > Is this still required? No. We can remove this. But I wanted to limit this change only to atomic f16/bf16 and not going deeper Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
mariusz-sikora-at-amd added inline comments. Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915 +defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; +defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; foad wrote: > Are these changes (from here to the end of the file) still required? Not sure if I understand what you mean. Could you please elaborate more ? Are you referring to the fact that both flat_atomic and global_atomic have FLAT encoding and could be unified ? I thought this is required, but now you got me thinking ... Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
foad added inline comments. Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2891 +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in { +defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>; Is this still required? Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1915 +defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; +defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; Are these changes (from here to the end of the file) still required? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
mariusz-sikora-at-amd updated this revision to Diff 507722. mariusz-sikora-at-amd added a comment. Changes after review. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 Files: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGenOpenCL/amdgpu-features.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/BUFInstructions.td llvm/lib/Target/AMDGPU/DSInstructions.td llvm/lib/Target/AMDGPU/FLATInstructions.td llvm/lib/Target/AMDGPU/GCNSubtarget.h Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h === --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -152,9 +152,13 @@ bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasPkFmacF16Inst = false; + bool HasAtomicDsPkAdd16Insts = false; + bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; - bool HasAtomicPkFaddNoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16Insts = false; + bool HasAtomicGlobalPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool SupportsSRAMECC = false; @@ -758,6 +762,10 @@ return HasPkFmacF16Inst; } + bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; } + + bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; } + bool hasAtomicFaddInsts() const { return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; } @@ -766,7 +774,17 @@ bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } - bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } + bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const { +return HasAtomicBufferGlobalPkAddF16NoRtnInsts; + } + + bool hasAtomicBufferGlobalPkAddF16Insts() const { +return HasAtomicBufferGlobalPkAddF16Insts; + } + + bool hasAtomicGlobalPkAddBF16Inst() const { +return HasAtomicGlobalPkAddBF16Inst; + } bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } Index: llvm/lib/Target/AMDGPU/FLATInstructions.td === --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -730,11 +730,14 @@ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = isGFX940Plus in { +let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; +} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts + +let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in { defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; -} // End SubtargetPredicate = isGFX940Plus +} // End SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst // GFX7-, GFX10-, GFX11-only flat instructions. let SubtargetPredicate = isGFX7GFX10GFX11 in { @@ -938,7 +941,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -946,7 +949,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [isGFX90APlus] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -1505,7 +1508,7 @@ defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; } -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in { +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; } @@ -1516,14 +1519,17 @@ defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
foad added inline comments. Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2889 -defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>; +let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in { defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; mariusz-sikora-at-amd wrote: > foad wrote: > > Could remove the braces if you prefer - then you don't need the "End" > > comment either. > So, as I understand from other comment: > > > Generally Real instructions copy their predicates from the corresponding > > Pseudo, so this should not be required here. Please check the other places > > where you have added predicates to Real instructions too. > > We do not need this (L2889) Predicate, because it was added to Pseudo > Instruction ? Correct. See the places commented "copy relevant pseudo op flags" in this file. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
mariusz-sikora-at-amd added inline comments. Comment at: clang/include/clang/Basic/BuiltinsAMDGPU.def:233 +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst") +TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts") foad wrote: > So __builtin_amdgcn_ds_atomic_fadd_v2f16 is missing here? (Just curious- I am > not asking you to add it in this patch.) I have a different change which will add only __builtin_amdgcn_ds_atomic_fadd_v2f16 (not pushed yet, because it depends on this change). Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2889 -defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>; +let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in { defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; foad wrote: > Could remove the braces if you prefer - then you don't need the "End" comment > either. So, as I understand from other comment: > Generally Real instructions copy their predicates from the corresponding > Pseudo, so this should not be required here. Please check the other places > where you have added predicates to Real instructions too. We do not need this (L2889) Predicate, because it was added to Pseudo Instruction ? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
foad added inline comments. Comment at: clang/include/clang/Basic/BuiltinsAMDGPU.def:233 +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst") +TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts") So __builtin_amdgcn_ds_atomic_fadd_v2f16 is missing here? (Just curious- I am not asking you to add it in this patch.) Comment at: llvm/lib/Target/AMDGPU/BUFInstructions.td:2889 -defm BUFFER_ATOMIC_ADD_F32: MUBUF_Real_Atomic_vi <0x4d>; +let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in { defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; Could remove the braces if you prefer - then you don't need the "End" comment either. Comment at: llvm/lib/Target/AMDGPU/FLATInstructions.td:1916 +let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in +defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; + Generally Real instructions copy their predicates from the corresponding Pseudo, so this should not be required here. Please check the other places where you have added predicates to Real instructions too. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D146701/new/ https://reviews.llvm.org/D146701 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D146701: [AMDGPU] Create Subtarget Features for some of 16 bits atomic fadd instructions
mariusz-sikora-at-amd created this revision. Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl, arsenm. Herald added a project: All. mariusz-sikora-at-amd requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, wdng. Herald added projects: clang, LLVM. Introducing Subtarget Features for instructions: - ds_pk_add_bf16 - ds_pk_add_f16 - ds_pk_add_rtn_bf16 - ds_pk_add_rtn_f16 - flat_atomic_pk_add_f16 - flat_atomic_pk_add_bf16 - global_atomic_pk_add_f16 - global_atomic_pk_add_bf16 - buffer_atomic_pk_add_f16 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D146701 Files: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGenOpenCL/amdgpu-features.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/BUFInstructions.td llvm/lib/Target/AMDGPU/DSInstructions.td llvm/lib/Target/AMDGPU/FLATInstructions.td llvm/lib/Target/AMDGPU/GCNSubtarget.h Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h === --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -152,9 +152,13 @@ bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasPkFmacF16Inst = false; + bool HasAtomicDsPkAdd16Insts = false; + bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; - bool HasAtomicPkFaddNoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; + bool HasAtomicBufferGlobalPkAddF16Insts = false; + bool HasAtomicGlobalPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool SupportsSRAMECC = false; @@ -758,6 +762,10 @@ return HasPkFmacF16Inst; } + bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; } + + bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; } + bool hasAtomicFaddInsts() const { return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; } @@ -766,7 +774,17 @@ bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } - bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } + bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const { +return HasAtomicBufferGlobalPkAddF16NoRtnInsts; + } + + bool hasAtomicBufferGlobalPkAddF16Insts() const { +return HasAtomicBufferGlobalPkAddF16Insts; + } + + bool hasAtomicGlobalPkAddBF16Inst() const { +return HasAtomicGlobalPkAddBF16Inst; + } bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } Index: llvm/lib/Target/AMDGPU/FLATInstructions.td === --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -730,11 +730,14 @@ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = isGFX940Plus in { +let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { defm FLAT_ATOMIC_PK_ADD_F16: FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; +} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts + +let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in { defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; -} // End SubtargetPredicate = isGFX940Plus +} // End SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst // GFX7-, GFX10-, GFX11-only flat instructions. let SubtargetPredicate = isGFX7GFX10GFX11 in { @@ -938,7 +941,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -946,7 +949,7 @@ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_add_f32", VGPR_32, f32 >; -let OtherPredicates = [isGFX90APlus] in +let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; @@ -1505,7 +1508,7 @@ defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; } -let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in { +let OtherPred