[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Nov 26, 2:41 PM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/117737).


https://github.com/llvm/llvm-project/pull/117737
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Shilei Tian via llvm-branch-commits

shiltian wrote:

three operands, alright...

https://github.com/llvm/llvm-project/pull/117737
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

Because that's what it is? 

https://github.com/llvm/llvm-project/pull/117737
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.

wonder why it is called `v_maximum3`.

https://github.com/llvm/llvm-project/pull/117737
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/117737
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 37.30 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/117737.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.ll (+50-96) 
- (modified) llvm/test/CodeGen/AMDGPU/fminimum3.ll (+50-96) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2e0f95161935a9..a24b6430378cc9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13346,7 +13346,8 @@ static bool supportsMin3Max3(const GCNSubtarget 
&Subtarget, unsigned Opc,
 return (VT == MVT::f32) || (VT == MVT::f16 && Subtarget.hasMin3Max3_16());
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
-return (VT == MVT::f32 || VT == MVT::f16) && Subtarget.hasIEEEMinMax3();
+return (VT == MVT::f32 && Subtarget.hasMinimum3Maximum3F32()) ||
+   (VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16());
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll 
b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 209ae86b4dedce..e771e5801f2eda 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -31,8 +31,7 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
   %max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -67,8 +66,7 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_commute:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v2, v0, v0
+; GFX950-NEXT:v_maximum3_f32 v0, v2, v0, v1
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
   %max1 = call float @llvm.maximum.f32(float %c, float %max0)
@@ -102,9 +100,9 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float 
inreg %b, float inre
 ;
 ; GFX950-LABEL: s_fmaximum3_f32:
 ; GFX950:   ; %bb.0:
-; GFX950-NEXT:v_mov_b32_e32 v0, s0
-; GFX950-NEXT:v_maximum3_f32 v0, v0, s1, s1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, s2, s2
+; GFX950-NEXT:v_mov_b32_e32 v0, s1
+; GFX950-NEXT:v_mov_b32_e32 v1, s2
+; GFX950-NEXT:v_maximum3_f32 v0, s0, v0, v1
 ; GFX950-NEXT:s_nop 0
 ; GFX950-NEXT:v_readfirstlane_b32 s0, v0
 ; GFX950-NEXT:; return to shader part epilog
@@ -143,8 +141,7 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs0:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, |v0|, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT:v_maximum3_f32 v0, |v0|, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %a.fabs = call float @llvm.fabs.f32(float %a)
   %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b)
@@ -180,8 +177,7 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs1:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, |v1|, |v1|
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT:v_maximum3_f32 v0, v0, |v1|, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %b.fabs = call float @llvm.fabs.f32(float %b)
   %max0 = call float @llvm.maximum.f32(float %a, float %b.fabs)
@@ -217,8 +213,7 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs2:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, |v2|
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %c.fabs = call float @llvm.fabs.f32(float %c)
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -254,8 +249,7 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, |v0|, |v1|, |v1|
-; GFX950-NEXT:v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT:v_maximum3_f32 v0, |v0|, |v1|, |v2|
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %a.fabs = call f

[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/117737
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/117737?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#117739** https://app.graphite.dev/github/pr/llvm/llvm-project/117739?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117738** https://app.graphite.dev/github/pr/llvm/llvm-project/117738?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117737** https://app.graphite.dev/github/pr/llvm/llvm-project/117737?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/117737?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#117634** https://app.graphite.dev/github/pr/llvm/llvm-project/117634?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117601** https://app.graphite.dev/github/pr/llvm/llvm-project/117601?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117600** https://app.graphite.dev/github/pr/llvm/llvm-project/117600?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117599** https://app.graphite.dev/github/pr/llvm/llvm-project/117599?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117598** https://app.graphite.dev/github/pr/llvm/llvm-project/117598?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117597** https://app.graphite.dev/github/pr/llvm/llvm-project/117597?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117596** https://app.graphite.dev/github/pr/llvm/llvm-project/117596?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117595** https://app.graphite.dev/github/pr/llvm/llvm-project/117595?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117594** https://app.graphite.dev/github/pr/llvm/llvm-project/117594?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117593** https://app.graphite.dev/github/pr/llvm/llvm-project/117593?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117592** https://app.graphite.dev/github/pr/llvm/llvm-project/117592?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117591** https://app.graphite.dev/github/pr/llvm/llvm-project/117591?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117590** https://app.graphite.dev/github/pr/llvm/llvm-project/117590?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117418** https://app.graphite.dev/github/pr/llvm/llvm-project/117418?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117417** https://app.graphite.dev/github/pr/llvm/llvm-project/117417?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117384** https://app.graphite.dev/github/pr/llvm/llvm-project/117384?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117383** https://app.graphite.dev/github/pr/llvm/llvm-project/117383?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117382** https

[llvm-branch-commits] [llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)

2024-11-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/117737

None

>From e109d7dbf889634f1af55769aed6e3f9df11f259 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 23 May 2024 21:23:16 +0200
Subject: [PATCH] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |   3 +-
 llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 146 --
 llvm/test/CodeGen/AMDGPU/fminimum3.ll | 146 --
 3 files changed, 102 insertions(+), 193 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2e0f95161935a9..a24b6430378cc9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13346,7 +13346,8 @@ static bool supportsMin3Max3(const GCNSubtarget 
&Subtarget, unsigned Opc,
 return (VT == MVT::f32) || (VT == MVT::f16 && Subtarget.hasMin3Max3_16());
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
-return (VT == MVT::f32 || VT == MVT::f16) && Subtarget.hasIEEEMinMax3();
+return (VT == MVT::f32 && Subtarget.hasMinimum3Maximum3F32()) ||
+   (VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16());
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll 
b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 209ae86b4dedce..e771e5801f2eda 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -31,8 +31,7 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
   %max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -67,8 +66,7 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_commute:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v2, v0, v0
+; GFX950-NEXT:v_maximum3_f32 v0, v2, v0, v1
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
   %max1 = call float @llvm.maximum.f32(float %c, float %max0)
@@ -102,9 +100,9 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float 
inreg %b, float inre
 ;
 ; GFX950-LABEL: s_fmaximum3_f32:
 ; GFX950:   ; %bb.0:
-; GFX950-NEXT:v_mov_b32_e32 v0, s0
-; GFX950-NEXT:v_maximum3_f32 v0, v0, s1, s1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, s2, s2
+; GFX950-NEXT:v_mov_b32_e32 v0, s1
+; GFX950-NEXT:v_mov_b32_e32 v1, s2
+; GFX950-NEXT:v_maximum3_f32 v0, s0, v0, v1
 ; GFX950-NEXT:s_nop 0
 ; GFX950-NEXT:v_readfirstlane_b32 s0, v0
 ; GFX950-NEXT:; return to shader part epilog
@@ -143,8 +141,7 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs0:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, |v0|, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT:v_maximum3_f32 v0, |v0|, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %a.fabs = call float @llvm.fabs.f32(float %a)
   %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b)
@@ -180,8 +177,7 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs1:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, |v1|, |v1|
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT:v_maximum3_f32 v0, v0, |v1|, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %b.fabs = call float @llvm.fabs.f32(float %b)
   %max0 = call float @llvm.maximum.f32(float %a, float %b.fabs)
@@ -217,8 +213,7 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs2:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT:v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT:v_maximum3_f32 v0, v0, v1, |v2|
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %c.fabs = call float @llvm.fabs.f32(float %c)
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -254,8 +249,7 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, 
float %c) {
 ; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_maximum3_f32 v0, |v0|, |v1|, |v1|
-; GFX950-NEXT:v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-