Module: Mesa
Branch: master
Commit: ed9463815644c85c124c72111d96e256db2986b4
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ed9463815644c85c124c72111d96e256db2986b4

Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Date:   Mon Apr  9 11:23:21 2018 +0200

radv: Enable RB+ where possible.

According to Marek, not enabling it on Stoney has a significant
negative performance impact. (And I guess this might impact
performance on Raven as well)

The register settings are pretty much copied from radeonsi. I did
not put this in the pipeline as that would make the pipeline more
dependent on the format which mean we would have to have more
pipelines for the meta shaders.

v2: Don't clear RB+ regs if not enabled as the CLEAR_STATE packet
    does already.
Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>

---

 src/amd/vulkan/radv_cmd_buffer.c | 140 +++++++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_pipeline.c   |  13 ++--
 src/amd/vulkan/radv_private.h    |   4 ++
 3 files changed, 151 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 3b1d6aedc8..f73526b5fc 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -679,6 +679,142 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
 }
 
 static void
+radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
+{
+       if (!cmd_buffer->device->physical_device->rbplus_allowed)
+               return;
+
+       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+       struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+       const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+
+       unsigned sx_ps_downconvert = 0;
+       unsigned sx_blend_opt_epsilon = 0;
+       unsigned sx_blend_opt_control = 0;
+
+       for (unsigned i = 0; i < subpass->color_count; ++i) {
+               if (subpass->color_attachments[i].attachment == 
VK_ATTACHMENT_UNUSED)
+                       continue;
+
+               int idx = subpass->color_attachments[i].attachment;
+               struct radv_color_buffer_info *cb = 
&framebuffer->attachments[idx].cb;
+
+               unsigned format = G_028C70_FORMAT(cb->cb_color_info);
+               unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
+               uint32_t spi_format = (pipeline->graphics.col_format >> (i * 
4)) & 0xf;
+               uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 
4)) & 0xf;
+
+               bool has_alpha, has_rgb;
+
+               /* Set if RGB and A are present. */
+               has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
+
+               if (format == V_028C70_COLOR_8 ||
+                   format == V_028C70_COLOR_16 ||
+                   format == V_028C70_COLOR_32)
+                       has_rgb = !has_alpha;
+               else
+                       has_rgb = true;
+
+               /* Check the colormask and export format. */
+               if (!(colormask & 0x7))
+                       has_rgb = false;
+               if (!(colormask & 0x8))
+                       has_alpha = false;
+
+               if (spi_format == V_028714_SPI_SHADER_ZERO) {
+                       has_rgb = false;
+                       has_alpha = false;
+               }
+
+               /* Disable value checking for disabled channels. */
+               if (!has_rgb)
+                       sx_blend_opt_control |= 
S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
+               if (!has_alpha)
+                       sx_blend_opt_control |= 
S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+
+               /* Enable down-conversion for 32bpp and smaller formats. */
+               switch (format) {
+               case V_028C70_COLOR_8:
+               case V_028C70_COLOR_8_8:
+               case V_028C70_COLOR_8_8_8_8:
+                       /* For 1 and 2-channel formats, use the superset 
thereof. */
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+                               sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_5_6_5:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_1_5_5_5:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_4_4_4_4:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << 
(i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_32:
+                       if (swap == V_028C70_SWAP_STD &&
+                           spi_format == V_028714_SPI_SHADER_32_R)
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R 
<< (i * 4);
+                       else if (swap == V_028C70_SWAP_ALT_REV &&
+                                spi_format == V_028714_SPI_SHADER_32_AR)
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A 
<< (i * 4);
+                       break;
+
+               case V_028C70_COLOR_16:
+               case V_028C70_COLOR_16_16:
+                       /* For 1-channel formats, use the superset thereof. */
+                       if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+                               if (swap == V_028C70_SWAP_STD ||
+                                   swap == V_028C70_SWAP_STD_REV)
+                                       sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+                               else
+                                       sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_10_11_11:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT 
<< (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_2_10_10_10:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= 
V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT 
<< (i * 4);
+                       }
+                       break;
+               }
+       }
+
+       radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 
3);
+       radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
+       radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
+       radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+}
+
+static void
 radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
 {
        struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
@@ -3005,6 +3141,10 @@ static void
 radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
                              const struct radv_draw_info *info)
 {
+       if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
+           cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
+               radv_emit_rbplus_state(cmd_buffer);
+
        if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
                radv_emit_graphics_pipeline(cmd_buffer);
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 08abb9dbc4..6735b36846 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2543,17 +2543,15 @@ radv_pipeline_generate_blend_state(struct 
radeon_winsys_cs *cs,
 
                radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
                radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
-
-               radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
-               radeon_emit(cs, 0);     /* R_028754_SX_PS_DOWNCONVERT */
-               radeon_emit(cs, 0);     /* R_028758_SX_BLEND_OPT_EPSILON */
-               radeon_emit(cs, 0);     /* R_02875C_SX_BLEND_OPT_CONTROL */
        }
 
        radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, 
blend->spi_shader_col_format);
 
        radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, 
blend->cb_target_mask);
        radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, 
blend->cb_shader_mask);
+
+       pipeline->graphics.col_format = blend->spi_shader_col_format;
+       pipeline->graphics.cb_target_mask = blend->cb_target_mask;
 }
 
 
@@ -2993,6 +2991,9 @@ radv_compute_db_shader_control(const struct radv_device 
*device,
        else
                z_order = V_02880C_LATE_Z;
 
+       bool disable_rbplus = device->physical_device->has_rbplus &&
+                             !device->physical_device->rbplus_allowed;
+
        return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
                
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
                S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
@@ -3001,7 +3002,7 @@ radv_compute_db_shader_control(const struct radv_device 
*device,
                S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
                S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
                S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) |
-               
S_02880C_DUAL_QUAD_DISABLE(!!device->physical_device->has_rbplus);
+               S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
 }
 
 static void
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 31748910ad..1bcc3a906e 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1238,6 +1238,10 @@ struct radv_pipeline {
                        bool can_use_guardband;
                        uint32_t needed_dynamic_state;
                        bool disable_out_of_order_rast_for_occlusion;
+
+                       /* Used for rbplus */
+                       uint32_t col_format;
+                       uint32_t cb_target_mask;
                } graphics;
        };
 

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to