[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-23 Thread Joe Nash via llvm-branch-commits

https://github.com/Sisyph approved this pull request.


https://github.com/llvm/llvm-project/pull/179225
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-23 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 3e1c3379a7d2f04d2d9ece351ef0f1fb698faacd Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  34 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll|  72 +
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 10 files changed, 181 insertions(+), 334 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+  Mods |= SISrcMods::OP_SEL_1;
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+  MachineRegisterInfo

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-23 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 3e1c3379a7d2f04d2d9ece351ef0f1fb698faacd Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  34 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll|  72 +
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 10 files changed, 181 insertions(+), 334 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+  Mods |= SISrcMods::OP_SEL_1;
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+  MachineRegisterInfo

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-23 Thread via llvm-branch-commits

github-actions[bot] wrote:


# :window: Windows x64 Test Results

* 61169 tests passed
* 1514 tests skipped

All executed tests passed, but another part of the build **failed**. Click on a 
failure below to see the details.


[code=4294967295] 
unittests/Transforms/Vectorize/VectorizeTests.exe

```
FAILED: [code=4294967295] unittests/Transforms/Vectorize/VectorizeTests.exe
C:\Windows\system32\cmd.exe /C "cd . && 
C:\BuildTools\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe 
-E vs_link_exe --msvc-ver=1944 
--intdir=unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir 
--rc="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0\x64\rc.exe" 
--mt="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0\x64\mt.exe" 
--manifests  -- C:\clang\clang-msvc\bin\lld-link.exe /nologo 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanTest.cpp.obj 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPDomTreeTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPPostDomFrontierTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanHCFGTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanPatternMatchTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanSlpTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanUncountableExitTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanVerifierTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\__\__\__\resources\windows_version_resource.rc.res
  /out:unittests\Transforms\Vectorize\VectorizeTests.exe 
/implib:unittests\Transforms\Vectorize\VectorizeTests.lib 
/pdb:unittests\Transforms\Vectorize\VectorizeTests.pdb /version:0.0 
/MANIFEST:NO /STACK:1000 /INCREMENTAL:NO /subsystem:console  
lib\LLVMAnalysis.lib  lib\LLVMCore.lib  lib\LLVMVectorize.lib  
lib\LLVMAsmParser.lib  lib\LLVMTargetParser.lib  lib\LLVMSupport.lib  
lib\llvm_gtest_main.lib  lib\llvm_gtest.lib  lib\LLVMTransformUtils.lib  
lib\LLVMSandboxIR.lib  lib\LLVMFrontendHLSL.lib  lib\LLVMProfileData.lib  
lib\LLVMSymbolize.lib  lib\LLVMDebugInfoGSYM.lib  lib\LLVMDebugInfoPDB.lib  
"C:\BuildTools\DIA SDK\lib\amd64\diaguids.lib"  lib\LLVMDebugInfoCodeView.lib  
lib\LLVMDebugInfoMSF.lib  lib\LLVMDebugInfoBTF.lib  lib\LLVMDebugInfoDWARF.lib  
lib\LLVMObject.lib  lib\LLVMIRReader.lib  lib\LLVMBitReader.lib  
lib\LLVMMCParser.lib  lib\LLVMMC.lib  lib\LLVMTextAPI.lib  
lib\LLVMDebugInfoDWARFLowLevel.lib  lib\LLVMBinaryFormat.lib  
lib\LLVMRemarks.lib  lib\LLVMBitstreamReader.lib  lib\LLVMDemangle.lib  
psapi.lib  shell32.lib  ole32.lib  uuid.lib  advapi32.lib  ws2_32.lib  
ntdll.lib  delayimp.lib  -delayload:shell32.dll  -delayload:ole32.dll  
kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib 
oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ."
LINK: command "C:\clang\clang-msvc\bin\lld-link.exe /nologo 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanTest.cpp.obj 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPDomTreeTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPPostDomFrontierTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanHCFGTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanPatternMatchTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanSlpTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanUncountableExitTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\VPlanVerifierTest.cpp.obj
 
unittests\Transforms\Vectorize\CMakeFiles\VectorizeTests.dir\__\__\__\resources\windows_version_resource.rc.res
 /out:unittests\Transforms\Vectorize\VectorizeTests.exe 
/implib:unittests\Transforms\Vectorize\VectorizeTests.lib 
/pdb:unittests\Transforms\Vectorize\VectorizeTests.pdb /version:0.0 
/MANIFEST:NO /STACK:1000 /INCREMENTAL:NO /subsystem:console 
lib\LLVMAnalysis.lib lib\LLVMCore.lib lib\LLVMVectorize.lib 
lib\LLVMAsmParser.lib lib\LLVMTargetParser.lib lib\LLVMSupport.lib 
lib\llvm_gtest_main.lib lib\llvm_gtest.lib lib\LLVMTransformUtils.lib 
lib\LLVMSandboxIR.lib lib\LLVMFrontendHLSL.lib lib\LLVMProfileData.lib 
lib\LLVMSymbolize.lib lib\LLVMDebugInfoGSYM.lib lib\LLVMDebugInfoPDB.lib 
C:\BuildTools\DIA SDK\lib\amd64\diaguids.lib lib\LLVMDebugInfoCodeView.lib 
lib\LLVMDebugInfoMSF.lib lib\LLVMDebugInfoBTF.lib lib\LLVMDebugInfoDWARF.lib 
lib\LLVMObject.lib lib\LLVMIRReader.lib lib\LLVMBitReader.lib 
lib\LLVMMCParser.lib lib\LLVMMC.lib lib\LLVMTextAPI.lib 
lib\LLVMDebugInfoDWARFLowLevel.lib lib\LLVMBinaryFormat.lib lib\LLVMRemarks.lib 
lib\LLVMBitstreamReader.lib lib\LLVMDemangle.lib psapi.lib shell32.lib 
ole32.lib uuid.lib advapi32.lib ws2_32.lib ntdll.lib delayimp.lib 
-delayload:shell32.dll -delayload:ole32.dll kernel32.lib user32.lib gdi32.lib 
winspool.lib sh

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-23 Thread Mirko Brkušanin via llvm-branch-commits

https://github.com/mbrkusanin approved this pull request.


https://github.com/llvm/llvm-project/pull/179225
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-23 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 8724ec58a4bbd1ff46303c34518bdc883f7aa929 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  34 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll|  72 +
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 10 files changed, 181 insertions(+), 334 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+  Mods |= SISrcMods::OP_SEL_1;
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+  MachineRegisterInfo

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-23 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 8724ec58a4bbd1ff46303c34518bdc883f7aa929 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 23 Mar 2026 13:44:33 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  34 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll|  72 +
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 10 files changed, 181 insertions(+), 334 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab590..f5747488225c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,14 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
+  Mods |= SISrcMods::OP_SEL_1;
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5269,12 +5277,23 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const {
+  MachineRegisterInfo

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-19 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 97f1c601c0de004b0bd23a767a6315103952a5e7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 19 Mar 2026 12:54:42 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  43 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll|  61 +---
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 10 files changed, 188 insertions(+), 325 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe2..51a8a476bbf7e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2d..0cb59faf3a4578 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f9..527923698eac21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab5906..54f56419a4ba97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5270,18 +5281,42 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
 }
 
 InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsF32(MachineO

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-19 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 97f1c601c0de004b0bd23a767a6315103952a5e7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 19 Mar 2026 12:54:42 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  43 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll|  61 +---
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 10 files changed, 188 insertions(+), 325 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe2..51a8a476bbf7e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2d..0cb59faf3a4578 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f9..527923698eac21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 80b30b98ab5906..54f56419a4ba97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5270,18 +5281,42 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
 }
 
 InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsF32(MachineO

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-18 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 9e6266ca9c54dab2d2dc64c1dc464e7634a776f1 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  48 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 9 files changed, 177 insertions(+), 280 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 32487094efab3..54f56419a4ba9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5269,22 +5280,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const 

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-18 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 9e6266ca9c54dab2d2dc64c1dc464e7634a776f1 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  48 ++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  68 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 276 --
 9 files changed, 177 insertions(+), 280 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index de8722841d3fe..51a8a476bbf7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 613dcfeb646a2..0cb59faf3a457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3691,6 +3691,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   SelectVOP3Mods(In, Src, SrcMods);
@@ -3700,6 +3711,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8b12d1d2a800f..527923698eac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 32487094efab3..54f56419a4ba9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5269,22 +5280,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) const 

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-03-05 Thread Mirko Brkušanin via llvm-branch-commits

https://github.com/mbrkusanin approved this pull request.


https://github.com/llvm/llvm-project/pull/179225
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-20 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 4d02482bdf260b028a45a8c9f56659404adb12c4 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  48 +++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  34 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 226 +-
 9 files changed, 157 insertions(+), 216 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d1d43841bca39..ac5ce562e4723 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) cons

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-20 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 4d02482bdf260b028a45a8c9f56659404adb12c4 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  48 +++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 +-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  34 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 226 +-
 9 files changed, 157 insertions(+), 216 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d1d43841bca39..ac5ce562e4723 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) cons

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-12 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From d014c59a6b43feb2f2f59c63222aeca8ae30ef9e Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  48 +++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 ++-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  34 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
 9 files changed, 159 insertions(+), 196 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b418a54a32297..831bf12836205 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) con

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-12 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From d014c59a6b43feb2f2f59c63222aeca8ae30ef9e Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 12 Feb 2026 17:56:30 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  48 +++-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  26 ++-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  34 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
 9 files changed, 159 insertions(+), 196 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8aba9752e3185..cd75545f76f72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b31134882cffe..c8d7212ac5b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3670,6 +3681,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b418a54a32297..831bf12836205 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4519,6 +4519,17 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5225,22 +5236,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PNoModsDOT(MachineOperand &Root) con

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-03 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 14cb3bad7b0f08f19156a9a9f0388280ce88d405 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 2 Feb 2026 13:05:03 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  56 -
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  10 +-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  34 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
 9 files changed, 157 insertions(+), 190 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7a854d7acf84a..fcfd07cc1d0e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4fdf222abb017..b9694273476f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3674,6 +3685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 76915549ebdfa..d80f8cd37a104 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4458,6 +4458,21 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+if (Mods & SISrcMods::ABS) {
+  Mods ^= SISrcMods::ABS;
+  Mods |= SISrcMods::NEG_HI;
+}
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5164,26 +5179,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+Instructi

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-03 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/179225

>From 14cb3bad7b0f08f19156a9a9f0388280ce88d405 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 2 Feb 2026 13:05:03 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 ++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  56 -
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  10 +-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  34 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 208 +-
 9 files changed, 157 insertions(+), 190 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7a854d7acf84a..fcfd07cc1d0e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4fdf222abb017..b9694273476f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3674,6 +3685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 76915549ebdfa..d80f8cd37a104 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4458,6 +4458,21 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+if (Mods & SISrcMods::ABS) {
+  Mods ^= SISrcMods::ABS;
+  Mods |= SISrcMods::NEG_HI;
+}
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5164,26 +5179,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
   return selectVOP3PRetHelper(Root, true);
 }
 
+Instructi

[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-02 Thread Petar Avramovic via llvm-branch-commits

petar-avramovic wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.com/github/pr/llvm/llvm-project/179225?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#179226** https://app.graphite.com/github/pr/llvm/llvm-project/179226?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#179225** https://app.graphite.com/github/pr/llvm/llvm-project/179225?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/179225?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#179224** https://app.graphite.com/github/pr/llvm/llvm-project/179224?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#179223** https://app.graphite.com/github/pr/llvm/llvm-project/179223?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/179225
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16 (PR #179225)

2026-02-02 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic created 
https://github.com/llvm/llvm-project/pull/179225

Select VOP2 version when there are no src_modifers, otherwise VOP3.

>From 8228474f52a1de46ccf4aa65eb3783dd6b14785c Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 2 Feb 2026 13:05:03 +0100
Subject: [PATCH] AMDGPU: Improve codegen for VOP2 v_dot2c_f32_f16/bf16

Select VOP2 version when there are no src_modifers, otherwise VOP3.
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  22 +++
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |   2 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  56 --
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   5 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   2 +
 llvm/lib/Target/AMDGPU/VOP2Instructions.td|  10 +-
 .../AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll  |  34 ++--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 160 +-
 9 files changed, 145 insertions(+), 154 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7a854d7acf84a..fcfd07cc1d0e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,10 +51,18 @@ def gi_vop3pmodsdot :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsdot :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_vop3pmodsf32 :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
 
+def gi_vop3pnomodsf32 :
+GIComplexOperandMatcher,
+GIComplexPatternEquiv;
+
 def gi_wmmaopselvop3pmods :
 GIComplexOperandMatcher,
 GIComplexPatternEquiv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4fdf222abb017..b9694273476f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3658,6 +3658,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, 
SDValue &Src,
   return SelectVOP3PMods(In, Src, SrcMods, true);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PMods(In, SrcTmp, SrcModsTmp, true);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, SDValue &Src,
 SDValue &SrcMods) const {
   unsigned Mods = SISrcMods::OP_SEL_1;
@@ -3674,6 +3685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(SDValue In, 
SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const {
+  SDValue SrcTmp, SrcModsTmp;
+  SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
+  if (cast(SrcModsTmp)->getZExtValue() == SISrcMods::OP_SEL_1) 
{
+Src = SrcTmp;
+return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
   SDValue &Src) const {
   const ConstantSDNode *C = cast(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 43550c7ab53f8..5c13072005a3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -233,7 +233,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
bool IsDOT = false) const;
   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsDOT(SDValue In, SDValue &Src) const;
   bool SelectVOP3PModsF32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PNoModsF32(SDValue In, SDValue &Src) const;
 
   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 76915549ebdfa..d80f8cd37a104 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4458,6 +4458,21 @@ std::pair 
AMDGPUInstructionSelector::selectVOP3ModsImpl(
   return std::pair(Src, Mods);
 }
 
+std::pair
+AMDGPUInstructionSelector::selectVOP3PModsF32Impl(Register Src) const {
+  unsigned Mods = SISrcMods::OP_SEL_1;
+  if (Subtarget->isGFX11Plus()) {
+unsigned ModsImpl;
+std::tie(Src, ModsImpl) = selectVOP3ModsImpl(Src);
+Mods |= ModsImpl;
+if (Mods & SISrcMods::ABS) {
+  Mods ^= SISrcMods::ABS;
+  Mods |= SISrcMods::NEG_HI;
+}
+  }
+  return std::pair(Src, Mods);
+}
+
 Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
 bool ForceVGPR) const {
@@ -5164,26 +5179,43 @@ 
AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &R