================
@@ -3787,6 +3790,47 @@ bool
AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
return true;
}
+bool AMDGPUInstructionSelector::selectTensorLoadStore(MachineInstr &MI,
+ Intrinsic::ID IID) const
{
+ bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
+ unsigned Opc =
+ IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS : AMDGPU::TENSOR_STORE_FROM_LDS;
+ int NumGroups = 4;
+
+ // A lamda function to check whether an operand is a vector of all 0s.
+ const auto isAllZeros = [&](MachineOperand &Opnd) {
+ const MachineInstr *DefMI = MRI->getVRegDef(Opnd.getReg());
+ if (!DefMI)
+ return false;
+ return llvm::isBuildVectorAllZeros(*DefMI, *MRI, true);
+ };
+
+ // Use _D2 version if both group 2 and 3 are zero-initialized.
+ if (isAllZeros(MI.getOperand(3)) && isAllZeros(MI.getOperand(4))) {
+ NumGroups = 2;
+ Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_D2
+ : AMDGPU::TENSOR_STORE_FROM_LDS_D2;
+ }
----------------
tgymnich wrote:
should we also check group 4 here, so that we don't forget to add this once d4
is actually used?
https://github.com/llvm/llvm-project/pull/182334
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits