On 04/07/2018 01:48 PM, Bas Nieuwenhuizen wrote:
According to Marek, not enabling it on Stoney has a significant
negative performance impact. (And I guess this might impact
performance on Raven as well)

The register settings are pretty much copied from radeonsi. I did
not put this in the pipeline as that would make the pipeline more
dependent on the format which mean we would have to have more
pipelines for the meta shaders.
---
  src/amd/vulkan/radv_cmd_buffer.c | 140 +++++++++++++++++++++++++++++++++++++++
  src/amd/vulkan/radv_pipeline.c   |  13 ++--
  src/amd/vulkan/radv_private.h    |   4 ++
  src/amd/vulkan/si_cmd_buffer.c   |   7 ++
  4 files changed, 158 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index d9f12a351e..dba744f798 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -681,6 +681,142 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
        state->prefetch_L2_mask &= ~mask;
  }
+static void
+radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
+{
+       if (!cmd_buffer->device->physical_device->rbplus_allowed)
+               return;
+
+       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+       struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+       const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+
+       unsigned sx_ps_downconvert = 0;
+       unsigned sx_blend_opt_epsilon = 0;
+       unsigned sx_blend_opt_control = 0;
+
+       for (unsigned i = 0; subpass->color_count; ++i) {
+               if (subpass->color_attachments[i].attachment == 
VK_ATTACHMENT_UNUSED)
+                       continue;
+
+               int idx = subpass->color_attachments[i].attachment;
+               struct radv_color_buffer_info *cb = 
&framebuffer->attachments[idx].cb;
+
+               unsigned format = G_028C70_FORMAT(cb->cb_color_info);
+               unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
+               uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) 
& 0xf;
+               uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) 
& 0xf;
+
+               bool has_alpha, has_rgb;
+
+               /* Set if RGB and A are present. */
+               has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
+
+               if (format == V_028C70_COLOR_8 ||
+                   format == V_028C70_COLOR_16 ||
+                   format == V_028C70_COLOR_32)
+                       has_rgb = !has_alpha;
+               else
+                       has_rgb = true;
+
+               /* Check the colormask and export format. */
+               if (!(colormask & 0x7))
+                       has_rgb = false;
+               if (!(colormask & 0x8))
+                       has_alpha = false;
+
+               if (spi_format == V_028714_SPI_SHADER_ZERO) {
+                       has_rgb = false;
+                       has_alpha = false;
+               }
+
+               /* Disable value checking for disabled channels. */
+               if (!has_rgb)
+                       sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) 
<< (i * 4);
+               if (!has_alpha)
+                       sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) 
<< (i * 4);
+
+               /* Enable down-conversion for 32bpp and smaller formats. */
+               switch (format) {
+               case V_028C70_COLOR_8:
+               case V_028C70_COLOR_8_8:
+               case V_028C70_COLOR_8_8_8_8:
+                       /* For 1 and 2-channel formats, use the superset 
thereof. */
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 
<< (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_5_6_5:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 
<< (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_1_5_5_5:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 
<< (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_4_4_4_4:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 
<< (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_32:
+                       if (swap == V_028C70_SWAP_STD &&
+                           spi_format == V_028714_SPI_SHADER_32_R)
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R 
<< (i * 4);
+                       else if (swap == V_028C70_SWAP_ALT_REV &&
+                                spi_format == V_028714_SPI_SHADER_32_AR)
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A 
<< (i * 4);
+                       break;
+
+               case V_028C70_COLOR_16:
+               case V_028C70_COLOR_16_16:
+                       /* For 1-channel formats, use the superset thereof. */
+                       if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+                               if (swap == V_028C70_SWAP_STD ||
+                                   swap == V_028C70_SWAP_STD_REV)
+                                       sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+                               else
+                                       sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_10_11_11:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 
<< (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT 
<< (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_2_10_10_10:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 
<< (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT 
<< (i * 4);
+                       }
+                       break;
+               }
+       }
+
+       radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 
3);
+       radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
+       radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
+       radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+}
+
  static void
  radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
  {
@@ -3004,6 +3140,10 @@ static void
  radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
                              const struct radv_draw_info *info)
  {
+       if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
+           cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
+               radv_emit_rbplus_state(cmd_buffer);
+
        if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
                radv_emit_graphics_pipeline(cmd_buffer);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 08abb9dbc4..6735b36846 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2543,17 +2543,15 @@ radv_pipeline_generate_blend_state(struct 
radeon_winsys_cs *cs,
radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
                radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
-
-               radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
-               radeon_emit(cs, 0);     /* R_028754_SX_PS_DOWNCONVERT */
-               radeon_emit(cs, 0);     /* R_028758_SX_BLEND_OPT_EPSILON */
-               radeon_emit(cs, 0);     /* R_02875C_SX_BLEND_OPT_CONTROL */
        }
radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
        radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, 
blend->cb_shader_mask);
+
+       pipeline->graphics.col_format = blend->spi_shader_col_format;
+       pipeline->graphics.cb_target_mask = blend->cb_target_mask;
  }
@@ -2993,6 +2991,9 @@ radv_compute_db_shader_control(const struct radv_device *device,
        else
                z_order = V_02880C_LATE_Z;
+ bool disable_rbplus = device->physical_device->has_rbplus &&
+                             !device->physical_device->rbplus_allowed;

We can rely on rbplus_allowed directly.

+
        return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
                
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
                S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
@@ -3001,7 +3002,7 @@ radv_compute_db_shader_control(const struct radv_device 
*device,
                S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
                S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
                S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) |
-               
S_02880C_DUAL_QUAD_DISABLE(!!device->physical_device->has_rbplus);
+               S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
  }
static void
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 9e655af844..b33a5887b1 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1238,6 +1238,10 @@ struct radv_pipeline {
                        bool can_use_guardband;
                        uint32_t needed_dynamic_state;
                        bool disable_out_of_order_rast_for_occlusion;
+
+                       /* Used for rbplus */
+                       uint32_t col_format;
+                       uint32_t cb_target_mask;
                } graphics;
        };
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index aed291be35..37855a714f 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -554,6 +554,13 @@ si_emit_config(struct radv_physical_device 
*physical_device,
                                       small_prim_filter_cntl);
        }
+ if (physical_device->has_rbplus && !physical_device->rbplus_allowed) {
+               radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
+               radeon_emit(cs, 0);     /* R_028754_SX_PS_DOWNCONVERT */
+               radeon_emit(cs, 0);     /* R_028758_SX_BLEND_OPT_EPSILON */
+               radeon_emit(cs, 0);     /* R_02875C_SX_BLEND_OPT_CONTROL */
+       }

This is actually useless because CLEAR_STATE initializes these registers correctly.

+
        si_emit_compute(physical_device, cs);
  }
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to