[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Nov 25, 10:16 PM EST**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/117591).


https://github.com/llvm/llvm-project/pull/117591
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/117591

>From 31eb3b6603f10fcda10bf85ec714907d8095c408 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap 
Date: Mon, 8 Apr 2024 01:53:50 -0400
Subject: [PATCH] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6
 of gfx950.

Co-authored-by: Pravin Jagtap 
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |  1 +
 llvm/lib/Target/AMDGPU/VOP3Instructions.td|  8 
 llvm/test/MC/AMDGPU/gfx950_asm_features.s | 22 -
 llvm/test/MC/AMDGPU/gfx950_err.s  | 48 +++
 .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt  | 12 +
 5 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f20d6526e20b2c..ea36347423c57c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1697,6 +1697,7 @@ class getVALUDstForVT {
VOPDstOperand_t16Lo128),
 VOPDstOperand);
   RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand,
+  !eq(VT.Size, 512) : VOPDstOperand,
   !eq(VT.Size, 256) : VOPDstOperand,
   !eq(VT.Size, 128) : VOPDstOperand,
   !eq(VT.Size, 64)  : VOPDstOperand,
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 1009f2d9593609..554aff7082010a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -966,6 +966,10 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, 
mayRaiseFPException = 0 in
 let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 
0 in {
   defm V_CVT_SCALEF32_PK32_F32_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
   defm V_CVT_SCALEF32_PK32_F32_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_fp6",  
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_fp6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_bf6",  
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_bf6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
 }
 
 let SubtargetPredicate = isGFX10Plus in {
@@ -1915,4 +1919,8 @@ defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 
<0x251>;
 let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, 
"v_cvt_scalef32_pk32_f32_fp6">;
 defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, 
"v_cvt_scalef32_pk32_f32_bf6">;
+defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3_Real_gfx9<0x260, 
"v_cvt_scalef32_pk32_f16_fp6">;
+defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3_Real_gfx9<0x261, 
"v_cvt_scalef32_pk32_bf16_fp6">;
+defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3_Real_gfx9<0x262, 
"v_cvt_scalef32_pk32_f16_bf6">;
+defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3_Real_gfx9<0x263, 
"v_cvt_scalef32_pk32_bf16_bf6">;
 }
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s 
b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 95d31d2293075f..271ad4d62c3a43 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -892,4 +892,24 @@ v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6
 
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: 
[0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
-v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
\ No newline at end of file
+v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x62,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x61,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x60,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8
diff --git a/llv

[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/117591
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/117591
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

Co-authored-by: Pravin Jagtap 

---
Full diff: https://github.com/llvm/llvm-project/pull/117591.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) 
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+8) 
- (modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+21-1) 
- (modified) llvm/test/MC/AMDGPU/gfx950_err.s (+48) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+12) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f20d6526e20b2c..ea36347423c57c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1697,6 +1697,7 @@ class getVALUDstForVT {
VOPDstOperand_t16Lo128),
 VOPDstOperand);
   RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand,
+  !eq(VT.Size, 512) : VOPDstOperand,
   !eq(VT.Size, 256) : VOPDstOperand,
   !eq(VT.Size, 128) : VOPDstOperand,
   !eq(VT.Size, 64)  : VOPDstOperand,
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 1009f2d9593609..554aff7082010a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -966,6 +966,10 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, 
mayRaiseFPException = 0 in
 let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 
0 in {
   defm V_CVT_SCALEF32_PK32_F32_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
   defm V_CVT_SCALEF32_PK32_F32_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_fp6",  
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_fp6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_bf6",  
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_bf6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
 }
 
 let SubtargetPredicate = isGFX10Plus in {
@@ -1915,4 +1919,8 @@ defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 
<0x251>;
 let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, 
"v_cvt_scalef32_pk32_f32_fp6">;
 defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, 
"v_cvt_scalef32_pk32_f32_bf6">;
+defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3_Real_gfx9<0x260, 
"v_cvt_scalef32_pk32_f16_fp6">;
+defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3_Real_gfx9<0x261, 
"v_cvt_scalef32_pk32_bf16_fp6">;
+defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3_Real_gfx9<0x262, 
"v_cvt_scalef32_pk32_f16_bf6">;
+defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3_Real_gfx9<0x263, 
"v_cvt_scalef32_pk32_bf16_bf6">;
 }
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s 
b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 95d31d2293075f..271ad4d62c3a43 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -892,4 +892,24 @@ v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6
 
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: 
[0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
-v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
\ No newline at end of file
+v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x62,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x61,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x60,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 6eebd4f7ccd76b..cb5b69be744781 100644
--- a/llvm/test

[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/117591
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/117591?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#117594** https://app.graphite.dev/github/pr/llvm/llvm-project/117594?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117593** https://app.graphite.dev/github/pr/llvm/llvm-project/117593?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117592** https://app.graphite.dev/github/pr/llvm/llvm-project/117592?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117591** https://app.graphite.dev/github/pr/llvm/llvm-project/117591?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/117591?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#117590** https://app.graphite.dev/github/pr/llvm/llvm-project/117590?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117418** https://app.graphite.dev/github/pr/llvm/llvm-project/117418?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117417** https://app.graphite.dev/github/pr/llvm/llvm-project/117417?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117384** https://app.graphite.dev/github/pr/llvm/llvm-project/117384?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117383** https://app.graphite.dev/github/pr/llvm/llvm-project/117383?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117382** https://app.graphite.dev/github/pr/llvm/llvm-project/117382?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117381** https://app.graphite.dev/github/pr/llvm/llvm-project/117381?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117380** https://app.graphite.dev/github/pr/llvm/llvm-project/117380?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117379** https://app.graphite.dev/github/pr/llvm/llvm-project/117379?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117378** https://app.graphite.dev/github/pr/llvm/llvm-project/117378?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117287** https://app.graphite.dev/github/pr/llvm/llvm-project/117287?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117286** https://app.graphite.dev/github/pr/llvm/llvm-project/117286?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117285** https://app.graphite.dev/github/pr/llvm/llvm-project/117285?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117284** https://app.graphite.dev/github/pr/llvm/llvm-project/117284?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117283** https://app.graphite.dev/github/pr/llvm/llvm-project/117283?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117263** https://app.graphite.dev/github/pr/llvm/llvm-project/117263?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117262** https

[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6 of gfx950. (PR #117591)

2024-11-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/117591

Co-authored-by: Pravin Jagtap 

>From 145c4c8611307f4039f390a1a69fad4fe4c14ee3 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap 
Date: Mon, 8 Apr 2024 01:53:50 -0400
Subject: [PATCH] AMDGPU: MC support for v_cvt_scalef32_pk32_{bf|f}16_{bf|fp}6
 of gfx950.

Co-authored-by: Pravin Jagtap 
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |  1 +
 llvm/lib/Target/AMDGPU/VOP3Instructions.td|  8 
 llvm/test/MC/AMDGPU/gfx950_asm_features.s | 22 -
 llvm/test/MC/AMDGPU/gfx950_err.s  | 48 +++
 .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt  | 12 +
 5 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f20d6526e20b2c..ea36347423c57c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1697,6 +1697,7 @@ class getVALUDstForVT {
VOPDstOperand_t16Lo128),
 VOPDstOperand);
   RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand,
+  !eq(VT.Size, 512) : VOPDstOperand,
   !eq(VT.Size, 256) : VOPDstOperand,
   !eq(VT.Size, 128) : VOPDstOperand,
   !eq(VT.Size, 64)  : VOPDstOperand,
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 1009f2d9593609..554aff7082010a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -966,6 +966,10 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, 
mayRaiseFPException = 0 in
 let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 
0 in {
   defm V_CVT_SCALEF32_PK32_F32_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
   defm V_CVT_SCALEF32_PK32_F32_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_fp6",  
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_fp6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_bf6",  
VOP3_CVT_SCALEF32_PK_F864_Profile>;
+  defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_bf6", 
VOP3_CVT_SCALEF32_PK_F864_Profile>;
 }
 
 let SubtargetPredicate = isGFX10Plus in {
@@ -1915,4 +1919,8 @@ defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 
<0x251>;
 let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, 
"v_cvt_scalef32_pk32_f32_fp6">;
 defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, 
"v_cvt_scalef32_pk32_f32_bf6">;
+defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3_Real_gfx9<0x260, 
"v_cvt_scalef32_pk32_f16_fp6">;
+defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3_Real_gfx9<0x261, 
"v_cvt_scalef32_pk32_bf16_fp6">;
+defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3_Real_gfx9<0x262, 
"v_cvt_scalef32_pk32_f16_bf6">;
+defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3_Real_gfx9<0x263, 
"v_cvt_scalef32_pk32_bf16_bf6">;
 }
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s 
b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 95d31d2293075f..271ad4d62c3a43 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -892,4 +892,24 @@ v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6
 
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: 
[0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
-v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
\ No newline at end of file
+v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x62,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x61,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 ; encoding: 
[0x0a,0x00,0x60,0xd2,0x14,0x11,0x02,0x00]
+v_cvt_scalef32_pk32_f16_fp6 v[10:25