Module: Mesa
Branch: main
Commit: df87c593f8a55f0a95359dc10bb4652b9ba19cde
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=df87c593f8a55f0a95359dc10bb4652b9ba19cde

Author: Marek Olšák <marek.ol...@amd.com>
Date:   Mon Oct 23 22:22:49 2023 -0400

radeonsi: rewrite PM4 packet building helpers with less duplication

First, the following universal helpers are defined:
- radeon_set_reg_seq
- radeon_set_reg
- radeon_opt_set_reg
- radeon_opt_set_reg2
- radeon_opt_set_reg3
- radeon_opt_set_reg4
- radeon_opt_set_reg5
- radeon_opt_set_regn
- gfx11_push_sh_reg
- gfx11_opt_push_sh_reg

Then the config, context, sh, uconfig, push_gfx and push_compute helpers
are implemented calling the above.

A lot of macros were receiving sctx via a parameter, which is changed to
use sctx directly in the macro (and the parameter is renamed to "_unused").

The only functional change is that the perfctr registers that incorrectly
set the predicate bit now correctly set the RESET_FILTER_CAM bit.

The helpers no longer check info.uses_kernel_cu_mask.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26095>

---

 src/gallium/drivers/radeonsi/si_build_pm4.h       | 481 +++++++++-------------
 src/gallium/drivers/radeonsi/si_perfcounter.c     |  12 +-
 src/gallium/drivers/radeonsi/si_sqtt.c            |   6 +-
 src/gallium/drivers/radeonsi/si_state_shaders.cpp |  45 +-
 4 files changed, 225 insertions(+), 319 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h 
b/src/gallium/drivers/radeonsi/si_build_pm4.h
index eba45c473f9..3f45226dcb4 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -36,10 +36,10 @@
 #define radeon_emit(value)  __cs_buf[__cs_num++] = (value)
 #define radeon_packets_added()  (__cs_num != __cs_num_initial)
 
-#define radeon_end_update_context_roll(sctx) do { \
+#define radeon_end_update_context_roll(_unused) do { \
    radeon_end(); \
    if (radeon_packets_added()) \
-      (sctx)->context_roll = true; \
+      sctx->context_roll = true; \
 } while (0)
 
 #define radeon_emit_array(values, num) do { \
@@ -56,337 +56,197 @@
    __cs_num += (num); \
 } while (0)
 
-#define radeon_set_config_reg_seq(reg, num) do { \
-   assert((reg) < SI_CONTEXT_REG_OFFSET); \
-   radeon_emit(PKT3(PKT3_SET_CONFIG_REG, num, 0)); \
-   radeon_emit(((reg) - SI_CONFIG_REG_OFFSET) >> 2); \
+/* Packet building helpers. Don't use directly. */
+#define radeon_set_reg_seq(reg, num, idx, prefix_name, packet, 
reset_filter_cam) do { \
+   assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); 
\
+   radeon_emit(PKT3(packet, num, 0) | 
PKT3_RESET_FILTER_CAM_S(reset_filter_cam)); \
+   radeon_emit((((reg) - prefix_name##_REG_OFFSET) >> 2) | ((idx) << 28)); \
 } while (0)
 
-#define radeon_set_config_reg(reg, value) do { \
-   radeon_set_config_reg_seq(reg, 1); \
+#define radeon_set_reg(reg, idx, value, prefix_name, packet) do { \
+   radeon_set_reg_seq(reg, 1, idx, prefix_name, packet, 0); \
    radeon_emit(value); \
 } while (0)
 
-#define radeon_set_context_reg_seq(reg, num) do { \
-   assert((reg) >= SI_CONTEXT_REG_OFFSET); \
-   radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \
-   radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
+#define radeon_opt_set_reg(reg, reg_enum, idx, value, prefix_name, packet, 
category) do { \
+   unsigned __value = (value); \
+   if (!((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x1) 
|| \
+       sctx->tracked_regs.category##_reg_value[(reg_enum)] != __value) { \
+      radeon_set_reg(reg, idx, __value, prefix_name, packet); \
+      sctx->tracked_regs.category##_reg_saved_mask |= 
BITFIELD64_BIT(reg_enum); \
+      sctx->tracked_regs.category##_reg_value[(reg_enum)] = __value; \
+   } \
 } while (0)
 
-#define radeon_set_context_reg(reg, value) do { \
-   radeon_set_context_reg_seq(reg, 1); \
-   radeon_emit(value); \
+/* Set consecutive registers if any value is different. */
+#define radeon_opt_set_reg2(reg, reg_enum, v1, v2, prefix_name, packet, 
category) do { \
+   unsigned __v1 = (v1), __v2 = (v2); \
+   if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x3) != 
0x3 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2) { \
+      radeon_set_reg_seq(reg, 2, 0, prefix_name, packet, 0); \
+      radeon_emit(__v1); \
+      radeon_emit(__v2); \
+      sctx->tracked_regs.category##_reg_saved_mask |= 
BITFIELD64_RANGE(reg_enum, 2); \
+      sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
+   } \
 } while (0)
 
-#define radeon_set_context_reg_seq_array(reg, num, values) do { \
-   radeon_set_context_reg_seq(reg, num); \
-   radeon_emit_array(values, num); \
+#define radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, prefix_name, packet, 
category) do { \
+   unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3); \
+   if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x7) != 
0x7 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3) { \
+      radeon_set_reg_seq(reg, 3, 0, prefix_name, packet, 0); \
+      radeon_emit(__v1); \
+      radeon_emit(__v2); \
+      radeon_emit(__v3); \
+      sctx->tracked_regs.category##_reg_saved_mask |= 
BITFIELD64_RANGE(reg_enum, 3); \
+      sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
+   } \
 } while (0)
 
-#define radeon_set_context_reg_idx(reg, idx, value) do { \
-   assert((reg) >= SI_CONTEXT_REG_OFFSET); \
-   radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
-   radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \
-   radeon_emit(value); \
+#define radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, 
packet, category) do { \
+   unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
+   if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0xf) != 
0xf || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] != __v4) { \
+      radeon_set_reg_seq(reg, 4, 0, prefix_name, packet, 0); \
+      radeon_emit(__v1); \
+      radeon_emit(__v2); \
+      radeon_emit(__v3); \
+      radeon_emit(__v4); \
+      sctx->tracked_regs.category##_reg_saved_mask |= 
BITFIELD64_RANGE(reg_enum, 4); \
+      sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] = __v4; \
+   } \
 } while (0)
 
-#define radeon_set_sh_reg_seq(reg, num) do { \
-   assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
-   radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \
-   radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
+#define radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, prefix_name, 
packet, category) do { \
+   unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5); \
+   if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x1f) 
!= 0x1f || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] != __v4 || \
+       sctx->tracked_regs.category##_reg_value[(reg_enum) + 4] != __v5) { \
+      radeon_set_reg_seq(reg, 5, 0, prefix_name, packet, 0); \
+      radeon_emit(__v1); \
+      radeon_emit(__v2); \
+      radeon_emit(__v3); \
+      radeon_emit(__v4); \
+      radeon_emit(__v5); \
+      sctx->tracked_regs.category##_reg_saved_mask |= 
BITFIELD64_RANGE(reg_enum, 5); \
+      sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] = __v4; \
+      sctx->tracked_regs.category##_reg_value[(reg_enum) + 4] = __v5; \
+   } \
 } while (0)
 
-#define radeon_set_sh_reg_idx_seq(sctx, reg, idx, num) do { \
-   assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
-   if ((sctx)->screen->info.uses_kernel_cu_mask) { \
-      assert((sctx)->gfx_level >= GFX10); \
-      radeon_emit(PKT3(PKT3_SET_SH_REG_INDEX, num, 0)); \
-      radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | ((idx) << 28)); \
-   } else { \
-      radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \
-      radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
+#define radeon_opt_set_regn(reg, values, saved_values, num, prefix_name, 
packet) do { \
+   if (memcmp(values, saved_values, sizeof(uint32_t) * (num))) { \
+      radeon_set_reg_seq(reg, num, 0, prefix_name, packet, 0); \
+      radeon_emit_array(values, num); \
+      memcpy(saved_values, values, sizeof(uint32_t) * (num)); \
    } \
 } while (0)
 
-#define radeon_set_sh_reg(reg, value) do { \
-   radeon_set_sh_reg_seq(reg, 1); \
-   radeon_emit(value); \
-} while (0)
+/* Packet building helpers for CONFIG registers. */
+#define radeon_set_config_reg(reg, value) \
+   radeon_set_reg(reg, 0, value, SI_CONFIG, PKT3_SET_CONFIG_REG)
 
-#define radeon_set_sh_reg_idx(sctx, reg, idx, value) do { \
-   radeon_set_sh_reg_idx_seq(sctx, reg, idx, 1); \
-   radeon_emit(value); \
-} while (0)
+/* Packet building helpers for CONTEXT registers. */
+/* TODO: Remove the _unused parameters everywhere. */
+#define radeon_set_context_reg_seq(reg, num) \
+   radeon_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
 
-#define radeon_push_gfx_sh_reg(reg, value) do { \
-   unsigned __i = sctx->num_buffered_gfx_sh_regs++; \
-   assert(__i / 2 < ARRAY_SIZE(sctx->buffered_gfx_sh_regs)); \
-   sctx->buffered_gfx_sh_regs[__i / 2].reg_offset[__i % 2] = ((reg) - 
SI_SH_REG_OFFSET) >> 2; \
-   sctx->buffered_gfx_sh_regs[__i / 2].reg_value[__i % 2] = value; \
-} while (0)
+#define radeon_set_context_reg(reg, value) \
+   radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
 
-#define radeon_push_compute_sh_reg(reg, value) do { \
-   unsigned __i = sctx->num_buffered_compute_sh_regs++; \
-   assert(__i / 2 < ARRAY_SIZE(sctx->buffered_compute_sh_regs)); \
-   sctx->buffered_compute_sh_regs[__i / 2].reg_offset[__i % 2] = ((reg) - 
SI_SH_REG_OFFSET) >> 2; \
-   sctx->buffered_compute_sh_regs[__i / 2].reg_value[__i % 2] = value; \
-} while (0)
+#define radeon_opt_set_context_reg(_unused, reg, reg_enum, value) \
+   radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, 
PKT3_SET_CONTEXT_REG, context)
 
-#define radeon_set_or_push_gfx_sh_reg(reg, value) do { \
-   if (HAS_PAIRS) { \
-      radeon_push_gfx_sh_reg(reg, value); \
-   } else { \
-      radeon_set_sh_reg_seq(reg, 1); \
-      radeon_emit(value); \
-   } \
-} while (0)
+#define radeon_opt_set_context_reg_idx(_unused, reg, reg_enum, idx, value) \
+   radeon_opt_set_reg(reg, reg_enum, idx, value, SI_CONTEXT, 
PKT3_SET_CONTEXT_REG, context)
 
-#define radeon_opt_push_gfx_sh_reg(offset, reg, val) do { \
-   unsigned __value = val; \
-   unsigned __reg = reg; \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (__reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.other_reg_value[__reg] != __value) { \
-      radeon_push_gfx_sh_reg(offset, __value); \
-      sctx->tracked_regs.other_reg_saved_mask |= BITFIELD_BIT(__reg); \
-      sctx->tracked_regs.other_reg_value[__reg] = __value; \
-   } \
-} while (0)
+#define radeon_opt_set_context_reg2(_unused, reg, reg_enum, v1, v2) \
+   radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_CONTEXT, 
PKT3_SET_CONTEXT_REG, context)
 
-#define radeon_opt_push_compute_sh_reg(offset, reg, val) do { \
-   unsigned __value = val; \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.other_reg_value[reg] != __value) { \
-      radeon_push_compute_sh_reg(offset, __value); \
-      sctx->tracked_regs.other_reg_saved_mask |= BITFIELD_BIT(reg); \
-      sctx->tracked_regs.other_reg_value[reg] = __value; \
-   } \
-} while (0)
+#define radeon_opt_set_context_reg3(_unused, reg, reg_enum, v1, v2, v3) \
+   radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_CONTEXT, 
PKT3_SET_CONTEXT_REG, context)
 
-#define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \
-   assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
-   radeon_emit(PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \
-   radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \
-} while (0)
+#define radeon_opt_set_context_reg4(_unused, reg, reg_enum, v1, v2, v3, v4) \
+   radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, 
PKT3_SET_CONTEXT_REG, context)
 
-#define radeon_set_uconfig_reg(reg, value) do { \
-   radeon_set_uconfig_reg_seq(reg, 1, false); \
-   radeon_emit(value); \
-} while (0)
+#define radeon_opt_set_context_reg5(_unused, reg, reg_enum, v1, v2, v3, v4, 
v5) \
+   radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, SI_CONTEXT, 
PKT3_SET_CONTEXT_REG, context)
 
-#define radeon_set_uconfig_reg_perfctr(reg, value) do { \
-   radeon_set_uconfig_reg_seq(reg, 1, true); \
-   radeon_emit(value); \
-} while (0)
+#define radeon_opt_set_context_regn(_unused, reg, values, saved_values, num) \
+   radeon_opt_set_regn(reg, values, saved_values, num, SI_CONTEXT, 
PKT3_SET_CONTEXT_REG)
 
-#define radeon_set_uconfig_reg_idx(screen, gfx_level, reg, idx, value) do { \
-   assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
-   assert((idx) != 0); \
-   unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \
-   if ((gfx_level) < GFX9 || \
-       ((gfx_level) == GFX9 && (screen)->info.me_fw_version < 26)) \
-      __opcode = PKT3_SET_UCONFIG_REG; \
-   radeon_emit(PKT3(__opcode, 1, 0)); \
-   radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \
-   radeon_emit(value); \
-} while (0)
+/* Packet building helpers for SH registers. */
+#define radeon_set_sh_reg_seq(reg, num) \
+   radeon_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
 
-/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
-#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
-   unsigned __value = val; \
-   if (((sctx->tracked_regs.context_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.context_reg_value[reg] != __value) { \
-      radeon_set_context_reg(offset, __value); \
-      sctx->tracked_regs.context_reg_saved_mask |= 0x1ull << (reg); \
-      sctx->tracked_regs.context_reg_value[reg] = __value; \
-   } \
-} while (0)
+#define radeon_set_sh_reg(reg, value) \
+   radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
 
-#define radeon_opt_set_context_reg_idx(sctx, offset, reg, idx, val) do { \
-   unsigned __value = val; \
-   if (((sctx->tracked_regs.context_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.context_reg_value[reg] != __value) { \
-      radeon_set_context_reg_idx(offset, idx, __value); \
-      sctx->tracked_regs.context_reg_saved_mask |= 0x1ull << (reg); \
-      sctx->tracked_regs.context_reg_value[reg] = __value; \
-   } \
-} while (0)
+#define radeon_opt_set_sh_reg(_unused, reg, reg_enum, value) \
+   radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG, other)
 
-/**
- * Set 2 consecutive registers if any registers value is different.
- * @param offset        starting register offset
- * @param val1          is written to first register
- * @param val2          is written to second register
- */
-#define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \
-   unsigned __value1 = (val1), __value2 = (val2); \
-   if (((sctx->tracked_regs.context_reg_saved_mask >> (reg)) & 0x3) != 0x3 || \
-       sctx->tracked_regs.context_reg_value[reg] != __value1 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 1] != __value2) { \
-      radeon_set_context_reg_seq(offset, 2); \
-      radeon_emit(__value1); \
-      radeon_emit(__value2); \
-      sctx->tracked_regs.context_reg_value[reg] = __value1; \
-      sctx->tracked_regs.context_reg_value[(reg) + 1] = __value2; \
-      sctx->tracked_regs.context_reg_saved_mask |= 0x3ull << (reg); \
-   } \
-} while (0)
+#define radeon_opt_set_sh_reg2(_unused, reg, reg_enum, v1, v2) \
+   radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_SH, PKT3_SET_SH_REG, other)
 
-/**
- * Set 3 consecutive registers if any registers value is different.
- */
-#define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \
-   unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
-   if (((sctx->tracked_regs.context_reg_saved_mask >> (reg)) & 0x7) != 0x7 || \
-       sctx->tracked_regs.context_reg_value[reg] != __value1 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 1] != __value2 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 2] != __value3) { \
-      radeon_set_context_reg_seq(offset, 3); \
-      radeon_emit(__value1); \
-      radeon_emit(__value2); \
-      radeon_emit(__value3); \
-      sctx->tracked_regs.context_reg_value[reg] = __value1; \
-      sctx->tracked_regs.context_reg_value[(reg) + 1] = __value2; \
-      sctx->tracked_regs.context_reg_value[(reg) + 2] = __value3; \
-      sctx->tracked_regs.context_reg_saved_mask |= 0x7ull << (reg); \
-   } \
-} while (0)
+#define radeon_opt_set_sh_reg3(_unused, reg, reg_enum, v1, v2, v3) \
+   radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_SH, PKT3_SET_SH_REG, 
other)
 
-/**
- * Set 4 consecutive registers if any registers value is different.
- */
-#define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) 
do { \
-   unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 
= (val4); \
-   if (((sctx->tracked_regs.context_reg_saved_mask >> (reg)) & 0xf) != 0xf || \
-       sctx->tracked_regs.context_reg_value[reg] != __value1 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 1] != __value2 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 2] != __value3 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 3] != __value4) { \
-      radeon_set_context_reg_seq(offset, 4); \
-      radeon_emit(__value1); \
-      radeon_emit(__value2); \
-      radeon_emit(__value3); \
-      radeon_emit(__value4); \
-      sctx->tracked_regs.context_reg_value[reg] = __value1; \
-      sctx->tracked_regs.context_reg_value[(reg) + 1] = __value2; \
-      sctx->tracked_regs.context_reg_value[(reg) + 2] = __value3; \
-      sctx->tracked_regs.context_reg_value[(reg) + 3] = __value4; \
-      sctx->tracked_regs.context_reg_saved_mask |= 0xfull << (reg); \
-   } \
+#define radeon_opt_set_sh_reg_idx(_unused, reg, reg_enum, idx, value) do { \
+   assert(sctx->gfx_level >= GFX10); \
+   radeon_opt_set_reg(reg, reg_enum, idx, value, SI_SH, PKT3_SET_SH_REG_INDEX, 
other); \
 } while (0)
 
-/**
- * Set 5 consecutive registers if any register value is different.
- */
-#define radeon_opt_set_context_reg5(sctx, offset, reg, val0, val1, val2, val3, 
val4) do { \
-   unsigned __value0 = (val0), __value1 = (val1), __value2 = (val2), __value3 
= (val3), __value4 = (val4); \
-   if (((sctx->tracked_regs.context_reg_saved_mask >> (reg)) & 0x1f) != 0x1f 
|| \
-       sctx->tracked_regs.context_reg_value[(reg) + 0] != __value0 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 1] != __value1 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 2] != __value2 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 3] != __value3 || \
-       sctx->tracked_regs.context_reg_value[(reg) + 4] != __value4) { \
-      radeon_set_context_reg_seq(offset, 5); \
-      radeon_emit(__value0); \
-      radeon_emit(__value1); \
-      radeon_emit(__value2); \
-      radeon_emit(__value3); \
-      radeon_emit(__value4); \
-      sctx->tracked_regs.context_reg_value[(reg) + 0] = __value0; \
-      sctx->tracked_regs.context_reg_value[(reg) + 1] = __value1; \
-      sctx->tracked_regs.context_reg_value[(reg) + 2] = __value2; \
-      sctx->tracked_regs.context_reg_value[(reg) + 3] = __value3; \
-      sctx->tracked_regs.context_reg_value[(reg) + 4] = __value4; \
-      sctx->tracked_regs.context_reg_saved_mask |= 0x1full << (reg); \
-   } \
+#define radeon_emit_32bit_pointer(_unused, va) do { \
+   assert((va) == 0 || ((va) >> 32) == sctx->screen->info.address32_hi); \
+   radeon_emit(va); \
 } while (0)
 
-/**
- * Set consecutive registers if any registers value is different.
- */
-#define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \
-   if (memcmp(value, saved_val, sizeof(uint32_t) * (num))) { \
-      radeon_set_context_reg_seq(offset, num); \
-      radeon_emit_array(value, num); \
-      memcpy(saved_val, value, sizeof(uint32_t) * (num)); \
-   } \
+#define radeon_emit_one_32bit_pointer(_unused, desc, sh_base) do { \
+   radeon_set_sh_reg_seq((sh_base) + (desc)->shader_userdata_offset, 1); \
+   radeon_emit_32bit_pointer(_unused, (desc)->gpu_address); \
 } while (0)
 
-#define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \
-   unsigned __value = val; \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.other_reg_value[reg] != __value) { \
-      radeon_set_sh_reg(offset, __value); \
-      sctx->tracked_regs.other_reg_saved_mask |= BITFIELD_BIT(reg); \
-      sctx->tracked_regs.other_reg_value[reg] = __value; \
-   } \
-} while (0)
+/* Packet building helpers for UCONFIG registers. */
+#define radeon_set_uconfig_reg_seq(reg, num) \
+   radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 0)
 
-/**
- * Set 2 consecutive registers if any register value is different.
- */
-#define radeon_opt_set_sh_reg2(sctx, offset, reg, val1, val2) do { \
-   unsigned __value1 = (val1), __value2 = (val2); \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x3) != 0x3 || \
-       sctx->tracked_regs.other_reg_value[reg] != __value1 || \
-       sctx->tracked_regs.other_reg_value[(reg) + 1] != __value2) { \
-      radeon_set_sh_reg_seq(offset, 2); \
-      radeon_emit(__value1); \
-      radeon_emit(__value2); \
-      sctx->tracked_regs.other_reg_value[reg] = __value1; \
-      sctx->tracked_regs.other_reg_value[(reg) + 1] = __value2; \
-      sctx->tracked_regs.other_reg_saved_mask |= 0x3ull << (reg); \
-   } \
-} while (0)
+#define radeon_set_uconfig_perfctr_reg_seq(reg, num) \
+   radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 
sctx->gfx_level >= GFX10)
 
-/**
- * Set 3 consecutive registers if any register value is different.
- */
-#define radeon_opt_set_sh_reg3(sctx, offset, reg, val1, val2, val3) do { \
-   unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x7) != 0x7 || \
-       sctx->tracked_regs.other_reg_value[reg] != __value1 || \
-       sctx->tracked_regs.other_reg_value[(reg) + 1] != __value2 || \
-       sctx->tracked_regs.other_reg_value[(reg) + 2] != __value3) { \
-      radeon_set_sh_reg_seq(offset, 3); \
-      radeon_emit(__value1); \
-      radeon_emit(__value2); \
-      radeon_emit(__value3); \
-      sctx->tracked_regs.other_reg_value[reg] = __value1; \
-      sctx->tracked_regs.other_reg_value[(reg) + 1] = __value2; \
-      sctx->tracked_regs.other_reg_value[(reg) + 2] = __value3; \
-      sctx->tracked_regs.other_reg_saved_mask |= 0x7ull << (reg); \
-   } \
-} while (0)
+#define radeon_set_uconfig_reg(reg, value) \
+   radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
 
-#define radeon_opt_set_sh_reg_idx(sctx, offset, reg, idx, val) do { \
-   unsigned __value = val; \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.other_reg_value[reg] != __value) { \
-      radeon_set_sh_reg_idx(sctx, offset, idx, __value); \
-      sctx->tracked_regs.other_reg_saved_mask |= BITFIELD_BIT(reg); \
-      sctx->tracked_regs.other_reg_value[reg] = __value; \
-   } \
-} while (0)
+#define radeon_opt_set_uconfig_reg(_unused, reg, reg_enum, value) \
+   radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, 
PKT3_SET_UCONFIG_REG, other)
 
-#define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \
-   unsigned __value = val; \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.other_reg_value[reg] != __value) { \
-      radeon_set_uconfig_reg(offset, __value); \
-      sctx->tracked_regs.other_reg_saved_mask |= 0x1ull << (reg); \
-      sctx->tracked_regs.other_reg_value[reg] = __value; \
-   } \
-} while (0)
+#define RESOLVE_PKT3_SET_UCONFIG_REG_INDEX \
+   (GFX_VERSION >= GFX10 || (GFX_VERSION == GFX9 && 
sctx->screen->info.me_fw_version >= 26) ? \
+    PKT3_SET_UCONFIG_REG_INDEX : PKT3_SET_UCONFIG_REG)
 
-#define radeon_opt_set_uconfig_reg_idx(sctx, gfx_level, offset, reg, idx, val) 
do { \
-   unsigned __value = val; \
-   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
-       sctx->tracked_regs.other_reg_value[reg] != __value) { \
-      radeon_set_uconfig_reg_idx((sctx)->screen, gfx_level, offset, idx, 
__value); \
-      sctx->tracked_regs.other_reg_saved_mask |= 0x1ull << (reg); \
-      sctx->tracked_regs.other_reg_value[reg] = __value; \
-   } \
-} while (0)
+#define radeon_set_uconfig_reg_idx(_unused, _unused2, reg, idx, value) \
+   radeon_set_reg(reg, idx, value, CIK_UCONFIG, 
RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
+
+#define radeon_opt_set_uconfig_reg_idx(_unused, _unused2, reg, reg_enum, idx, 
value) \
+   radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, 
RESOLVE_PKT3_SET_UCONFIG_REG_INDEX, other)
 
 #define radeon_set_privileged_config_reg(reg, value) do { \
    assert((reg) < CIK_UCONFIG_REG_OFFSET); \
@@ -399,15 +259,44 @@
    radeon_emit(0); /* unused */ \
 } while (0)
 
-#define radeon_emit_32bit_pointer(sscreen, va) do { \
-   radeon_emit(va); \
-   assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \
+/* GFX11 generic packet building helpers for buffered SH registers. Don't use 
these directly. */
+#define gfx11_push_sh_reg(reg, value, type) do { \
+   unsigned __i = sctx->num_buffered_##type##_sh_regs++; \
+   assert(__i / 2 < ARRAY_SIZE(sctx->buffered_##type##_sh_regs)); \
+   sctx->buffered_##type##_sh_regs[__i / 2].reg_offset[__i % 2] = ((reg) - 
SI_SH_REG_OFFSET) >> 2; \
+   sctx->buffered_##type##_sh_regs[__i / 2].reg_value[__i % 2] = value; \
+} while (0)
+
+#define gfx11_opt_push_sh_reg(reg, reg_enum, value, type) do { \
+   unsigned __value = value; \
+   if (((sctx->tracked_regs.other_reg_saved_mask >> (reg_enum)) & 0x1) != 0x1 
|| \
+       sctx->tracked_regs.other_reg_value[reg_enum] != __value) { \
+      gfx11_push_sh_reg(reg, __value, type); \
+      sctx->tracked_regs.other_reg_saved_mask |= BITFIELD_BIT(reg_enum); \
+      sctx->tracked_regs.other_reg_value[reg_enum] = __value; \
+   } \
 } while (0)
 
-#define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \
-   unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \
-   radeon_set_sh_reg_seq(sh_offset, 1); \
-   radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
+/* GFX11 packet building helpers for buffered SH registers. */
+#define radeon_push_gfx_sh_reg(reg, value) \
+   gfx11_push_sh_reg(reg, value, gfx)
+
+#define radeon_push_compute_sh_reg(reg, value) \
+   gfx11_push_sh_reg(reg, value, compute)
+
+#define radeon_opt_push_gfx_sh_reg(reg, reg_enum, value) \
+   gfx11_opt_push_sh_reg(reg, reg_enum, value, gfx)
+
+#define radeon_opt_push_compute_sh_reg(reg, reg_enum, value) \
+   gfx11_opt_push_sh_reg(reg, reg_enum, value, compute)
+
+#define radeon_set_or_push_gfx_sh_reg(reg, value) do { \
+   if (GFX_VERSION >= GFX11 && HAS_PAIRS) { \
+      radeon_push_gfx_sh_reg(reg, value); \
+   } else { \
+      radeon_set_sh_reg_seq(reg, 1); \
+      radeon_emit(value); \
+   } \
 } while (0)
 
 /* This should be evaluated at compile time if all parameters are constants. */
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 0a68d7efc8a..821d3a436bd 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -70,7 +70,7 @@ static void si_pc_emit_instance(struct si_context *sctx, int 
se, int instance)
 void si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders)
 {
    radeon_begin(cs);
-   radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
+   radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2);
    radeon_emit(shaders & 0x7f);
    radeon_emit(0xffffffff);
    radeon_end();
@@ -92,12 +92,12 @@ static void si_pc_emit_select(struct si_context *sctx, 
struct ac_pc_block *block
    radeon_begin(cs);
 
    for (idx = 0; idx < count; ++idx) {
-      radeon_set_uconfig_reg_seq(regs->select0[idx], 1, false);
+      radeon_set_uconfig_reg_seq(regs->select0[idx], 1);
       radeon_emit(selectors[idx] | regs->select_or);
    }
 
    for (idx = 0; idx < regs->num_spm_counters; idx++) {
-      radeon_set_uconfig_reg_seq(regs->select1[idx], 1, false);
+      radeon_set_uconfig_reg_seq(regs->select1[idx], 1);
       radeon_emit(0);
    }
 
@@ -748,7 +748,7 @@ si_emit_spm_counters(struct si_context *sctx, struct 
radeon_cmdbuf *cs)
          const struct ac_spm_counter_select *cntr_sel = 
&spm->sqg[instance].counters[b];
          uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
 
-         radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false);
+         radeon_set_uconfig_reg_seq(reg_base + b * 4, 1);
          radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* 
SQC_BANK_MASK only gfx10 */
       }
    }
@@ -768,10 +768,10 @@ si_emit_spm_counters(struct si_context *sctx, struct 
radeon_cmdbuf *cs)
             if (!cntr_sel->active)
                continue;
 
-            radeon_set_uconfig_reg_seq(regs->select0[c], 1, false);
+            radeon_set_uconfig_reg_seq(regs->select0[c], 1);
             radeon_emit(cntr_sel->sel0);
 
-            radeon_set_uconfig_reg_seq(regs->select1[c], 1, false);
+            radeon_set_uconfig_reg_seq(regs->select1[c], 1);
             radeon_emit(cntr_sel->sel1);
          }
       }
diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c 
b/src/gallium/drivers/radeonsi/si_sqtt.c
index 63618e666be..05891d98115 100644
--- a/src/gallium/drivers/radeonsi/si_sqtt.c
+++ b/src/gallium/drivers/radeonsi/si_sqtt.c
@@ -798,11 +798,7 @@ static void si_emit_sqtt_userdata(struct si_context *sctx,
   while (num_dwords > 0) {
     uint32_t count = MIN2(num_dwords, 2);
 
-    /* Without the perfctr bit the CP might not always pass the
-     * write on correctly. */
-    radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count,
-                               sctx->gfx_level >= GFX10);
-
+    radeon_set_uconfig_perfctr_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, 
count);
     radeon_emit_array(dwords, count);
 
     dwords += count;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp 
b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 8b3686a5694..d0f76a1c5a5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -975,14 +975,26 @@ static void si_emit_shader_gs(struct si_context *sctx, 
unsigned index)
    /* These don't cause any context rolls. */
    radeon_begin_again(&sctx->gfx_cs);
    if (sctx->gfx_level >= GFX7) {
-      radeon_opt_set_sh_reg_idx(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
-                                SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
-                                3, shader->gs.spi_shader_pgm_rsrc3_gs);
+      if (sctx->screen->info.uses_kernel_cu_mask) {
+         radeon_opt_set_sh_reg_idx(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                                   SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+                                   3, shader->gs.spi_shader_pgm_rsrc3_gs);
+      } else {
+         radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                               SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+                               shader->gs.spi_shader_pgm_rsrc3_gs);
+      }
    }
    if (sctx->gfx_level >= GFX10) {
-      radeon_opt_set_sh_reg_idx(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
-                                SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
-                                3, shader->gs.spi_shader_pgm_rsrc4_gs);
+      if (sctx->screen->info.uses_kernel_cu_mask) {
+         radeon_opt_set_sh_reg_idx(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                                   SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+                                   3, shader->gs.spi_shader_pgm_rsrc4_gs);
+      } else {
+         radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                               SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+                               shader->gs.spi_shader_pgm_rsrc4_gs);
+      }
    }
    radeon_end();
 }
@@ -1204,12 +1216,21 @@ static void gfx10_emit_shader_ngg(struct si_context 
*sctx, unsigned index)
                                  SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
                                  shader->gs.spi_shader_pgm_rsrc4_gs);
    } else {
-      radeon_opt_set_sh_reg_idx(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
-                                SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
-                                3, shader->ngg.spi_shader_pgm_rsrc3_gs);
-      radeon_opt_set_sh_reg_idx(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
-                                SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
-                                3, shader->ngg.spi_shader_pgm_rsrc4_gs);
+      if (sctx->screen->info.uses_kernel_cu_mask) {
+         radeon_opt_set_sh_reg_idx(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                                   SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+                                   3, shader->ngg.spi_shader_pgm_rsrc3_gs);
+         radeon_opt_set_sh_reg_idx(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                                   SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+                                   3, shader->ngg.spi_shader_pgm_rsrc4_gs);
+      } else {
+         radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                               SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+                               shader->ngg.spi_shader_pgm_rsrc3_gs);
+         radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                               SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+                               shader->ngg.spi_shader_pgm_rsrc4_gs);
+      }
    }
    radeon_end();
 }

Reply via email to