[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/Sisyph approved this pull request. https://github.com/llvm/llvm-project/pull/179225 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 3e1c3379a7d2f04d2d9ece351ef0f1fb698faacd Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 34 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll| 72 +
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
10 files changed, 181 insertions(+), 334 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+ Mods |= SISrcMods::OP_SEL_1;
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+ MachineRegisterInfo
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 3e1c3379a7d2f04d2d9ece351ef0f1fb698faacd Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 34 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll| 72 +
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
10 files changed, 181 insertions(+), 334 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+ Mods |= SISrcMods::OP_SEL_1;
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+ MachineRegisterInfo
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
github-actions[bot] wrote: # :window: Windows x64 Test Results * 61169 tests passed * 1514 tests skipped All executed tests passed, but another part of the build **failed**. Click on a failure below to see the details. [code=4294967295] unittests/Transforms/Vectorize/VectorizeTests.exe ``` FAILED: [code=4294967295] unittests/Transforms/Vectorize/VectorizeTests.exe C:\Windows\system32\cmd.exe /C "cd . && C:\BuildTools\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe -E vs_link_exe --msvc-ver=1944 --intdir=unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir --rc="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0\x64\rc.exe" --mt="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0\x64\mt.exe" --manifests -- C:\clang\clang-msvc\bin\lld-link.exe /nologo unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPDomTreeTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPPostDomFrontierTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanHCFGTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanPatternMatchTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanSlpTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanUncountableExitTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanVerifierTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\__\__\__\resources\windows_version_resource.rc.res /out:unittests\Transforms\Vectorize\VectorizeTests.exe /implib:unittests\Transforms\Vectorize\VectorizeTests.lib /pdb:unittests\Transforms\Vectorize\VectorizeTests.pdb /version:0.0 /MANIFEST:NO /STACK:1000 /INCREMENTAL:NO /subsystem:console lib\LLVMAnalysis.lib lib\LLVMCore.lib lib\LLVMVectorize.lib lib\LLVMAsmParser.lib lib\LLVMTargetParser.lib lib\LLVMSupport.lib lib\llvm_gtest_main.lib lib\llvm_gtest.lib lib\LLVMTransformUtils.lib lib\LLVMSandboxIR.lib lib\LLVMFrontendHLSL.lib lib\LLVMProfileData.lib lib\LLVMSymbolize.lib lib\LLVMDebugInfoGSYM.lib lib\LLVMDebugInfoPDB.lib "C:\BuildTools\DIA SDK\lib\amd64\diaguids.lib" lib\LLVMDebugInfoCodeView.lib lib\LLVMDebugInfoMSF.lib lib\LLVMDebugInfoBTF.lib lib\LLVMDebugInfoDWARF.lib lib\LLVMObject.lib lib\LLVMIRReader.lib lib\LLVMBitReader.lib lib\LLVMMCParser.lib lib\LLVMMC.lib lib\LLVMTextAPI.lib lib\LLVMDebugInfoDWARFLowLevel.lib lib\LLVMBinaryFormat.lib lib\LLVMRemarks.lib lib\LLVMBitstreamReader.lib lib\LLVMDemangle.lib psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib ws2_32.lib ntdll.lib delayimp.lib -delayload:shell32.dll -delayload:ole32.dll kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ." LINK: command "C:\clang\clang-msvc\bin\lld-link.exe /nologo unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPDomTreeTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPPostDomFrontierTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanHCFGTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanPatternMatchTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanSlpTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanUncountableExitTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanVerifierTest.cpp.obj unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\__\__\__\resources\windows_version_resource.rc.res /out:unittests\Transforms\Vectorize\VectorizeTests.exe /implib:unittests\Transforms\Vectorize\VectorizeTests.lib /pdb:unittests\Transforms\Vectorize\VectorizeTests.pdb /version:0.0 /MANIFEST:NO /STACK:1000 /INCREMENTAL:NO /subsystem:console lib\LLVMAnalysis.lib lib\LLVMCore.lib lib\LLVMVectorize.lib lib\LLVMAsmParser.lib lib\LLVMTargetParser.lib lib\LLVMSupport.lib lib\llvm_gtest_main.lib lib\llvm_gtest.lib lib\LLVMTransformUtils.lib lib\LLVMSandboxIR.lib lib\LLVMFrontendHLSL.lib lib\LLVMProfileData.lib lib\LLVMSymbolize.lib lib\LLVMDebugInfoGSYM.lib lib\LLVMDebugInfoPDB.lib C:\BuildTools\DIA SDK\lib\amd64\diaguids.lib lib\LLVMDebugInfoCodeView.lib lib\LLVMDebugInfoMSF.lib lib\LLVMDebugInfoBTF.lib lib\LLVMDebugInfoDWARF.lib lib\LLVMObject.lib lib\LLVMIRReader.lib lib\LLVMBitReader.lib lib\LLVMMCParser.lib lib\LLVMMC.lib lib\LLVMTextAPI.lib lib\LLVMDebugInfoDWARFLowLevel.lib lib\LLVMBinaryFormat.lib lib\LLVMRemarks.lib lib\LLVMBitstreamReader.lib lib\LLVMDemangle.lib psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib ws2_32.lib ntdll.lib delayimp.lib -delayload:shell32.dll -delayload:ole32.dll kernel32.lib user32.lib gdi32.lib winspool.lib sh
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/mbrkusanin approved this pull request. https://github.com/llvm/llvm-project/pull/179225 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 8724ec58a4bbd1ff46303c34518bdc883f7aa929 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 34 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll| 72 +
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
10 files changed, 181 insertions(+), 334 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+ Mods |= SISrcMods::OP_SEL_1;
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+ MachineRegisterInfo
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 8724ec58a4bbd1ff46303c34518bdc883f7aa929 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 34 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll| 72 +
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
10 files changed, 181 insertions(+), 334 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+ Mods |= SISrcMods::OP_SEL_1;
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+ MachineRegisterInfo
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 97f1c601c0de004b0bd23a767a6315103952a5e7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 19 Mar 2026 12:54:42 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 43 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll| 61 +---
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
10 files changed, 188 insertions(+), 325 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe2..51a8a476bbf7e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2d..0cb59faf3a4578 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f9..527923698eac21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab5906..54f56419a4ba97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5270,18 +5281,42 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsF32(MachineO
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 97f1c601c0de004b0bd23a767a6315103952a5e7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 19 Mar 2026 12:54:42 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 43 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll| 61 +---
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
10 files changed, 188 insertions(+), 325 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe2..51a8a476bbf7e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2d..0cb59faf3a4578 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f9..527923698eac21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab5906..54f56419a4ba97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5270,18 +5281,42 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsF32(MachineO
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 9e6266ca9c54dab2d2dc64c1dc464e7634a776f1 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 48 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
9 files changed, 177 insertions(+), 280 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 32487094efab3..54f56419a4ba9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5269,22 +5280,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 9e6266ca9c54dab2d2dc64c1dc464e7634a776f1 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 48 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 68 ++---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
9 files changed, 177 insertions(+), 280 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 32487094efab3..54f56419a4ba9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5269,22 +5280,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/mbrkusanin approved this pull request. https://github.com/llvm/llvm-project/pull/179225 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 4d02482bdf260b028a45a8c9f56659404adb12c4 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 48 +++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 34 +--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 226 +-
9 files changed, 157 insertions(+), 216 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d1d43841bca39..ac5ce562e4723 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) cons
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 4d02482bdf260b028a45a8c9f56659404adb12c4 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 48 +++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 +-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 34 +--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 226 +-
9 files changed, 157 insertions(+), 216 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d1d43841bca39..ac5ce562e4723 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) cons
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From d014c59a6b43feb2f2f59c63222aeca8ae30ef9e Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 48 +++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 ++-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 34 +--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
9 files changed, 159 insertions(+), 196 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b418a54a32297..831bf12836205 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) con
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From d014c59a6b43feb2f2f59c63222aeca8ae30ef9e Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 48 +++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 26 ++-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 34 +--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
9 files changed, 159 insertions(+), 196 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b418a54a32297..831bf12836205 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) con
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 14cb3bad7b0f08f19156a9a9f0388280ce88d405 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Mon, 2 Feb 2026 13:05:03 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 56 -
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 10 +-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 34 +--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
9 files changed, 157 insertions(+), 190 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7a854d7acf84a..fcfd07cc1d0e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4fdf222abb017..b9694273476f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3674,6 +3685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 76915549ebdfa..d80f8cd37a104 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4458,6 +4458,21 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+if (Mods & SISrcMods::ABS) {
+ Mods ^= SISrcMods::ABS;
+ Mods |= SISrcMods::NEG_HI;
+}
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5164,26 +5179,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+Instructi
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/179225
>From 14cb3bad7b0f08f19156a9a9f0388280ce88d405 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Mon, 2 Feb 2026 13:05:03 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 ++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 56 -
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 10 +-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 34 +--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
9 files changed, 157 insertions(+), 190 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7a854d7acf84a..fcfd07cc1d0e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4fdf222abb017..b9694273476f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3674,6 +3685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 76915549ebdfa..d80f8cd37a104 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4458,6 +4458,21 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+if (Mods & SISrcMods::ABS) {
+ Mods ^= SISrcMods::ABS;
+ Mods |= SISrcMods::NEG_HI;
+}
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5164,26 +5179,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
+Instructi
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
petar-avramovic wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.com/github/pr/llvm/llvm-project/179225?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#179226** https://app.graphite.com/github/pr/llvm/llvm-project/179226?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#179225** https://app.graphite.com/github/pr/llvm/llvm-project/179225?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/179225?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#179224** https://app.graphite.com/github/pr/llvm/llvm-project/179224?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#179223** https://app.graphite.com/github/pr/llvm/llvm-project/179223?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/179225 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)
https://github.com/petar-avramovic created
https://github.com/llvm/llvm-project/pull/179225
Select VOP2 version when there are no src_modifers, otherwise VOP3.
>From 8228474f52a1de46ccf4aa65eb3783dd6b14785c Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Mon, 2 Feb 2026 13:05:03 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16
Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 +++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 56 --
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td| 10 +-
.../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll | 34 ++--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 160 +-
9 files changed, 145 insertions(+), 154 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7a854d7acf84a..fcfd07cc1d0e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_vop3pmodsf32 :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher,
GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4fdf222abb017..b9694273476f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In,
SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3674,6 +3685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In,
SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+ SDValue SrcTmp, SrcModsTmp;
+ SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+ if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1)
{
+Src = SrcTmp;
+return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 76915549ebdfa..d80f8cd37a104 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4458,6 +4458,21 @@ std::pair
AMDGPUInstructionSelector::selectVOP3ModsImpl(
return std::pair(Src, Mods);
}
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+if (Mods & SISrcMods::ABS) {
+ Mods ^= SISrcMods::ABS;
+ Mods |= SISrcMods::NEG_HI;
+}
+ }
+ return std::pair(Src, Mods);
+}
+
Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR) const {
@@ -5164,26 +5179,43 @@
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &R
