r-b
On Tue, Jul 16, 2019 at 7:28 AM Dave Airlie <airl...@gmail.com> wrote: > > From: Dave Airlie <airl...@redhat.com> > > This is ported from AMDVLK, it's probably not requires unless > we want to use "real time queues", but it might be nice to just have > in place. > --- > src/amd/common/sid.h | 1 + > src/amd/vulkan/radv_cs.h | 18 +++++++++++++++ > src/amd/vulkan/si_cmd_buffer.c | 42 +++++++++++++++++++--------------- > 3 files changed, 42 insertions(+), 19 deletions(-) > > diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h > index d464b6a110e..0b996e54884 100644 > --- a/src/amd/common/sid.h > +++ b/src/amd/common/sid.h > @@ -196,6 +196,7 @@ > #define PKT3_INCREMENT_CE_COUNTER 0x84 > #define PKT3_INCREMENT_DE_COUNTER 0x85 > #define PKT3_WAIT_ON_CE_COUNTER 0x86 > +#define PKT3_SET_SH_REG_INDEX 0x9B > #define PKT3_LOAD_CONTEXT_REG 0x9F /* new for VI */ > > #define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) > diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h > index eb1aedb0327..d21acba7e8e 100644 > --- a/src/amd/vulkan/radv_cs.h > +++ b/src/amd/vulkan/radv_cs.h > @@ -97,6 +97,24 @@ static inline void radeon_set_sh_reg(struct radeon_cmdbuf > *cs, unsigned reg, uns > radeon_emit(cs, value); > } > > +static inline void radeon_set_sh_reg_idx(const struct radv_physical_device > *pdevice, > + struct radeon_cmdbuf *cs, > + unsigned reg, unsigned idx, > + unsigned value) > +{ > + assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); > + assert(cs->cdw + 3 <= cs->max_dw); > + assert(idx); > + > + unsigned opcode = PKT3_SET_SH_REG_INDEX; > + if (pdevice->rad_info.chip_class < GFX10) > + opcode = PKT3_SET_SH_REG; > + > + radeon_emit(cs, PKT3(opcode, 1, 0)); > + radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28)); > + radeon_emit(cs, value); > +} > + > static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, > unsigned reg, unsigned num) > { > assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); > diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c > index a832dbd89eb..f789cdd1ce6 100644 > --- a/src/amd/vulkan/si_cmd_buffer.c > +++ b/src/amd/vulkan/si_cmd_buffer.c > @@ -262,20 +262,24 @@ si_emit_graphics(struct radv_physical_device > *physical_device, > if (physical_device->rad_info.chip_class >= GFX7) { > if (physical_device->rad_info.chip_class >= GFX10) { > /* Logical CUs 16 - 31 */ > - radeon_set_sh_reg(cs, > R_00B404_SPI_SHADER_PGM_RSRC4_HS, > - S_00B404_CU_EN(0xffff)); > - radeon_set_sh_reg(cs, > R_00B204_SPI_SHADER_PGM_RSRC4_GS, > - S_00B204_CU_EN(0xffff) | > - > S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0)); > - radeon_set_sh_reg(cs, > R_00B104_SPI_SHADER_PGM_RSRC4_VS, > - S_00B104_CU_EN(0xffff)); > - radeon_set_sh_reg(cs, > R_00B004_SPI_SHADER_PGM_RSRC4_PS, > - S_00B004_CU_EN(0xffff)); > + radeon_set_sh_reg_idx(physical_device, > + cs, > R_00B404_SPI_SHADER_PGM_RSRC4_HS, > + 3, S_00B404_CU_EN(0xffff)); > + radeon_set_sh_reg_idx(physical_device, > + cs, > R_00B204_SPI_SHADER_PGM_RSRC4_GS, > + 3, S_00B204_CU_EN(0xffff) | > + > S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0)); > + radeon_set_sh_reg_idx(physical_device, > + cs, > R_00B104_SPI_SHADER_PGM_RSRC4_VS, > + 3, S_00B104_CU_EN(0xffff)); > + radeon_set_sh_reg_idx(physical_device, > + cs, > R_00B004_SPI_SHADER_PGM_RSRC4_PS, > + 3, S_00B004_CU_EN(0xffff)); > } > > if (physical_device->rad_info.chip_class >= GFX9) { > - radeon_set_sh_reg(cs, > R_00B41C_SPI_SHADER_PGM_RSRC3_HS, > - S_00B41C_CU_EN(0xffff) | > S_00B41C_WAVE_LIMIT(0x3F)); > + radeon_set_sh_reg_idx(physical_device, cs, > R_00B41C_SPI_SHADER_PGM_RSRC3_HS, > + 3, S_00B41C_CU_EN(0xffff) | > S_00B41C_WAVE_LIMIT(0x3F)); > } else { > radeon_set_sh_reg(cs, > R_00B51C_SPI_SHADER_PGM_RSRC3_LS, > S_00B51C_CU_EN(0xffff) | > S_00B51C_WAVE_LIMIT(0x3F)); > @@ -291,8 +295,8 @@ si_emit_graphics(struct radv_physical_device > *physical_device, > > S_028A44_ES_VERTS_PER_SUBGRP(64) | > > S_028A44_GS_PRIMS_PER_SUBGRP(4)); > } > - radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, > - S_00B21C_CU_EN(0xffff) | > S_00B21C_WAVE_LIMIT(0x3F)); > + radeon_set_sh_reg_idx(physical_device, cs, > R_00B21C_SPI_SHADER_PGM_RSRC3_GS, > + 3, S_00B21C_CU_EN(0xffff) | > S_00B21C_WAVE_LIMIT(0x3F)); > > if (physical_device->rad_info.num_good_cu_per_sh <= 4) { > /* Too few available compute units per SH. Disallowing > @@ -301,8 +305,8 @@ si_emit_graphics(struct radv_physical_device > *physical_device, > * > * LATE_ALLOC_VS = 2 is the highest safe number. > */ > - radeon_set_sh_reg(cs, > R_00B118_SPI_SHADER_PGM_RSRC3_VS, > - S_00B118_CU_EN(0xffff) | > S_00B118_WAVE_LIMIT(0x3F) ); > + radeon_set_sh_reg_idx(physical_device, cs, > R_00B118_SPI_SHADER_PGM_RSRC3_VS, > + 3, S_00B118_CU_EN(0xffff) | > S_00B118_WAVE_LIMIT(0x3F) ); > radeon_set_sh_reg(cs, > R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); > } else { > /* Set LATE_ALLOC_VS == 31. It should be less than > @@ -310,13 +314,13 @@ si_emit_graphics(struct radv_physical_device > *physical_device, > * - VS can't execute on CU0. > * - If HS writes outputs to LDS, LS can't execute on > CU0. > */ > - radeon_set_sh_reg(cs, > R_00B118_SPI_SHADER_PGM_RSRC3_VS, > - S_00B118_CU_EN(0xfffe) | > S_00B118_WAVE_LIMIT(0x3F)); > + radeon_set_sh_reg_idx(physical_device, cs, > R_00B118_SPI_SHADER_PGM_RSRC3_VS, > + 3, S_00B118_CU_EN(0xfffe) | > S_00B118_WAVE_LIMIT(0x3F)); > radeon_set_sh_reg(cs, > R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); > } > > - radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, > - S_00B01C_CU_EN(0xffff) | > S_00B01C_WAVE_LIMIT(0x3F)); > + radeon_set_sh_reg_idx(physical_device, cs, > R_00B01C_SPI_SHADER_PGM_RSRC3_PS, > + 3, S_00B01C_CU_EN(0xffff) | > S_00B01C_WAVE_LIMIT(0x3F)); > } > > if (physical_device->rad_info.chip_class >= GFX10) { > -- > 2.21.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev