r-b for the series
On Wed, Oct 11, 2017 at 7:15 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > Based on RadeonSI. > > This improves some Vulkan demos by +1% to +3%. > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/vulkan/si_cmd_buffer.c | 99 > +++++++++++++++++++++++++----------------- > 1 file changed, 58 insertions(+), 41 deletions(-) > > diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c > index 626b68ad8e..1e8b43d4fa 100644 > --- a/src/amd/vulkan/si_cmd_buffer.c > +++ b/src/amd/vulkan/si_cmd_buffer.c > @@ -332,15 +332,25 @@ si_emit_config(struct radv_physical_device > *physical_device, > { > int i; > > + /* Only SI can disable CLEAR_STATE for now. */ > + assert(physical_device->has_clear_state || > + physical_device->rad_info.chip_class == SI); > + > radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); > radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); > radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1)); > > + if (physical_device->has_clear_state) { > + radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 1, 0)); > + radeon_emit(cs, 0); > + } > + > if (physical_device->rad_info.chip_class <= VI) > si_set_raster_config(physical_device, cs); > > radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); > - radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); > + if (!physical_device->has_clear_state) > + radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, > fui(0)); > > /* FIXME calculate these values somehow ??? */ > if (physical_device->rad_info.chip_class <= VI) { > @@ -348,16 +358,15 @@ si_emit_config(struct radv_physical_device > *physical_device, > radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40); > } > > - radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); > - > - radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); > - radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, > 0); > + if (!physical_device->has_clear_state) { > + radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); > + radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, > 0x0); > + radeon_set_context_reg(cs, > R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); > + } > > - radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); > radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); > - if (physical_device->rad_info.chip_class >= GFX9) > - radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 0); > - radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); > + if (!physical_device->has_clear_state) > + radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); > if (physical_device->rad_info.chip_class < CIK) > radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, > S_008A14_NUM_CLIP_SEQ(3) | > S_008A14_CLIP_VTX_REORDER_ENA(1)); > @@ -365,30 +374,43 @@ si_emit_config(struct radv_physical_device > *physical_device, > radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, > 0x76543210); > radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, > 0xfedcba98); > > - radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); > + if (!physical_device->has_clear_state) > + radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, > 0); > > - for (i = 0; i < 16; i++) { > - radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, > 0); > - radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, > fui(1.0)); > + /* CLEAR_STATE doesn't clear these correctly on certain generations. > + * I don't know why. Deduced by trial and error. > + */ > + if (physical_device->rad_info.chip_class <= CIK) { > + radeon_set_context_reg(cs, > R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); > + radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, > + S_028204_WINDOW_OFFSET_DISABLE(1)); > + radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, > + S_028240_WINDOW_OFFSET_DISABLE(1)); > + radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, > + S_028244_BR_X(16384) | > S_028244_BR_Y(16384)); > + radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, > 0); > + radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, > + S_028034_BR_X(16384) | > S_028034_BR_Y(16384)); > + } > + > + if (!physical_device->has_clear_state) { > + for (i = 0; i < 16; i++) { > + radeon_set_context_reg(cs, > R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); > + radeon_set_context_reg(cs, > R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); > + } > + } > + > + if (!physical_device->has_clear_state) { > + radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, > 0xFFFF); > + radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, > 0xAAAAAAAA); > + /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI > */ > + radeon_set_context_reg(cs, > R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); > + radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); > + radeon_set_context_reg(cs, > R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); > + radeon_set_context_reg(cs, > R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); > + radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); > } > > - radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, > S_028204_WINDOW_OFFSET_DISABLE(1)); > - radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, > S_028240_WINDOW_OFFSET_DISABLE(1)); > - radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, > - S_028244_BR_X(16384) | S_028244_BR_Y(16384)); > - radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); > - radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, > - S_028034_BR_X(16384) | S_028034_BR_Y(16384)); > - > - radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); > - radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); > - /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ > - radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); > - radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); > - > - radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); > - radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); > - radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); > radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, > > S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | > > S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); > @@ -398,6 +420,11 @@ si_emit_config(struct radv_physical_device > *physical_device, > radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0); > radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0); > } else { > + /* These registers, when written, also overwrite the > + * CLEAR_STATE context, so we can't rely on CLEAR_STATE > setting > + * them. It would be an issue if there was another UMD > + * changing them. > + */ > radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0); > radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0); > radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); > @@ -448,9 +475,6 @@ si_emit_config(struct radv_physical_device > *physical_device, > radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL, > > S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | > > S_028424_OVERWRITE_COMBINER_WATERMARK(4)); > - if (physical_device->rad_info.family < CHIP_POLARIS10) > - radeon_set_context_reg(cs, > R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); > - radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); > > vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | > S_028B50_ACCUM_TRI(11) | > @@ -463,14 +487,11 @@ si_emit_config(struct radv_physical_device > *physical_device, > > radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, > vgt_tess_distribution); > - } else { > + } else if (!physical_device->has_clear_state) { > radeon_set_context_reg(cs, > R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); > radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); > } > > - if (physical_device->has_rbplus) > - radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0); > - > if (physical_device->rad_info.chip_class >= GFX9) { > unsigned num_se = physical_device->rad_info.max_se; > unsigned pc_lines = 0; > @@ -488,10 +509,6 @@ si_emit_config(struct radv_physical_device > *physical_device, > > radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, > > S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF)); > - radeon_set_context_reg(cs, R_028064_DB_RENDER_FILTER, 0); > - /* TODO: We can use this to disable RBs for rendering to > GART: */ > - radeon_set_context_reg(cs, > R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0); > - radeon_set_context_reg(cs, > R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0); > /* TODO: Enable the binner: */ > radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, > > S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | > -- > 2.14.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev