Module: Mesa
Branch: main
Commit: 3c25edfdb7456648d4226cf08bf62c57c998e894
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c25edfdb7456648d4226cf08bf62c57c998e894

Author: Georg Lehmann <[email protected]>
Date:   Tue Jan  3 22:54:10 2023 +0100

aco: Improve wave64 cycle estimates.

Reviewed-By: Tatsuyuki Ishi <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20507>

---

 src/amd/compiler/aco_statistics.cpp | 50 ++++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/amd/compiler/aco_statistics.cpp 
b/src/amd/compiler/aco_statistics.cpp
index 800d9a91c3a..5662bc7af4c 100644
--- a/src/amd/compiler/aco_statistics.cpp
+++ b/src/amd/compiler/aco_statistics.cpp
@@ -107,13 +107,11 @@ struct perf_info {
 static bool
 is_dual_issue_capable(const Program& program, const Instruction& instruction)
 {
-   if (program.gfx_level < GFX11 || !instruction.isVALU())
+   if (program.gfx_level < GFX11 || !instruction.isVALU() || 
instruction.isDPP())
       return false;
 
-   /* Currently assumed to be just the instructions that are allowed as both
-    * VOPD X and VOPD Y operation.
-    */
    switch (instruction.opcode) {
+   case aco_opcode::v_fma_f32:
    case aco_opcode::v_fmac_f32:
    case aco_opcode::v_fmaak_f32:
    case aco_opcode::v_fmamk_f32:
@@ -122,10 +120,54 @@ is_dual_issue_capable(const Program& program, const 
Instruction& instruction)
    case aco_opcode::v_sub_f32:
    case aco_opcode::v_subrev_f32:
    case aco_opcode::v_mul_legacy_f32:
+   case aco_opcode::v_fma_legacy_f32:
+   case aco_opcode::v_fmac_legacy_f32:
+   case aco_opcode::v_fma_mix_f32:
+   case aco_opcode::v_fma_mixlo_f16:
+   case aco_opcode::v_fma_mixhi_f16:
+   case aco_opcode::v_fma_f16:
+   case aco_opcode::v_fmac_f16:
+   case aco_opcode::v_fmaak_f16:
+   case aco_opcode::v_fmamk_f16:
+   case aco_opcode::v_mul_f16:
+   case aco_opcode::v_add_f16:
+   case aco_opcode::v_sub_f16:
+   case aco_opcode::v_subrev_f16:
    case aco_opcode::v_mov_b32:
+   case aco_opcode::v_movreld_b32:
+   case aco_opcode::v_movrels_b32:
+   case aco_opcode::v_movrelsd_b32:
+   case aco_opcode::v_movrelsd_2_b32:
    case aco_opcode::v_cndmask_b32:
+   case aco_opcode::v_writelane_b32_e64:
+   case aco_opcode::v_mov_b16:
+   case aco_opcode::v_cndmask_b16:
    case aco_opcode::v_max_f32:
    case aco_opcode::v_min_f32:
+   case aco_opcode::v_max_f16:
+   case aco_opcode::v_min_f16:
+   case aco_opcode::v_max_i16_e64:
+   case aco_opcode::v_min_i16_e64:
+   case aco_opcode::v_max_u16_e64:
+   case aco_opcode::v_min_u16_e64:
+   case aco_opcode::v_add_i16:
+   case aco_opcode::v_sub_i16:
+   case aco_opcode::v_mad_i16:
+   case aco_opcode::v_add_u16_e64:
+   case aco_opcode::v_sub_u16_e64:
+   case aco_opcode::v_mad_u16:
+   case aco_opcode::v_mul_lo_u16_e64:
+   case aco_opcode::v_not_b16:
+   case aco_opcode::v_and_b16:
+   case aco_opcode::v_or_b16:
+   case aco_opcode::v_xor_b16:
+   case aco_opcode::v_lshrrev_b16_e64:
+   case aco_opcode::v_ashrrev_i16_e64:
+   case aco_opcode::v_lshlrev_b16_e64:
+   case aco_opcode::v_dot2_bf16_bf16:
+   case aco_opcode::v_dot2_f32_bf16:
+   case aco_opcode::v_dot2_f16_f16:
+   case aco_opcode::v_dot2_f32_f16:
    case aco_opcode::v_dot2c_f32_f16: return true;
    default: return false;
    }

Reply via email to