From: Marek Olšák <marek.ol...@amd.com>

This does change the behavior slightly:
  If a shader writes COLOR[i] and that color buffer isn't bound,
  the shader will export MRT_NULL instead and discard the IR tree that
  calculates the output. The only exception is alpha-to-coverage, which
  requires an alpha export.
---
 src/gallium/drivers/radeon/r600_pipe_common.h   |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h          |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c        | 35 ++++++++++++------
 src/gallium/drivers/radeonsi/si_shader.h        |  2 +-
 src/gallium/drivers/radeonsi/si_state.c         | 39 +++++++++++---------
 src/gallium/drivers/radeonsi/si_state.h         |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 47 ++++++++++++++++++++-----
 7 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 27f6e98..f3271e2 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -252,6 +252,7 @@ struct r600_surface {
        unsigned cb_color_fmask_slice;  /* EG and later */
        unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
        unsigned cb_color_mask;         /* R600 only */
+       unsigned spi_shader_col_format; /* SI+ */
        unsigned sx_ps_downconvert;     /* Stoney only */
        unsigned sx_blend_opt_epsilon;  /* Stoney only */
        struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index f83cb02..e2009de 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -125,7 +125,7 @@ struct si_framebuffer {
        unsigned                        log_samples;
        unsigned                        cb0_is_integer;
        unsigned                        compressed_cb_mask;
-       unsigned                        export_16bpc;
+       unsigned                        spi_shader_col_format;
        unsigned                        dirty_cbufs;
        bool                            dirty_zsbuf;
 };
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2de7def..266ef6d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1265,7 +1265,7 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
        struct lp_build_context *uint =
                                
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
        struct lp_build_context *base = &bld_base->base;
-       unsigned compressed = 0;
+       unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
        unsigned chan;
 
        /* XXX: This controls which components of the output
@@ -1286,17 +1286,29 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
        args[3] = lp_build_const_int32(base->gallivm, target);
 
        if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+               unsigned col_formats =
+                       si_shader_ctx->shader->key.ps.spi_shader_col_format;
                int cbuf = target - V_008DFC_SQ_EXP_MRT;
 
-               if (cbuf >= 0 && cbuf < 8)
-                       compressed = 
(si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
+               assert(cbuf >= 0 && cbuf < 8);
+               spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
        }
 
-       /* Set COMPR flag */
-       args[4] = compressed ? uint->one : uint->zero;
+       args[4] = uint->zero; /* COMPR flag */
+       args[5] = base->undef;
+       args[6] = base->undef;
+       args[7] = base->undef;
+       args[8] = base->undef;
+
+       switch (spi_shader_col_format) {
+       case V_028714_SPI_SHADER_ZERO:
+               args[0] = uint->zero; /* writemask */
+               args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_NULL);
+               break;
+
+       case V_028714_SPI_SHADER_FP16_ABGR:
+               args[4] = uint->one; /* COMPR flag */
 
-       if (compressed) {
-               /* Pixel shader needs to pack output values before export */
                for (chan = 0; chan < 2; chan++) {
                        LLVMValueRef pack_args[2] = {
                                values[2 * chan],
@@ -1314,10 +1326,13 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
                                                 packed,
                                                 
LLVMFloatTypeInContext(base->gallivm->context),
                                                 "");
-                       args[chan + 7] = base->undef;
                }
-       } else
+               break;
+
+       case V_028714_SPI_SHADER_32_ABGR:
                memcpy(&args[5], values, sizeof(values[0]) * 4);
+               break;
+       }
 }
 
 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
@@ -4031,7 +4046,7 @@ void si_dump_shader_key(unsigned shader, union 
si_shader_key *key, FILE *f)
                break;
 
        case PIPE_SHADER_FRAGMENT:
-               fprintf(f, "  export_16bpc = 0x%X\n", key->ps.export_16bpc);
+               fprintf(f, "  spi_shader_col_format = 0x%x\n", 
key->ps.spi_shader_col_format);
                fprintf(f, "  last_cbuf = %u\n", key->ps.last_cbuf);
                fprintf(f, "  color_two_side = %u\n", key->ps.color_two_side);
                fprintf(f, "  alpha_func = %u\n", key->ps.alpha_func);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 1635358..a9b76c6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -232,7 +232,7 @@ struct si_shader_selector {
 
 union si_shader_key {
        struct {
-               unsigned        export_16bpc:8;
+               unsigned        spi_shader_col_format;
                unsigned        last_cbuf:3;
                unsigned        color_two_side:1;
                unsigned        alpha_func:3;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index ea441ac..492d3f9 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -403,6 +403,7 @@ static void *si_create_blend_state_mode(struct pipe_context 
*ctx,
        if (!blend)
                return NULL;
 
+       blend->alpha_to_coverage = state->alpha_to_coverage;
        blend->alpha_to_one = state->alpha_to_one;
        blend->dual_src_blend = util_blend_state_is_dual(state, 0);
 
@@ -1883,6 +1884,21 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, 
unsigned level, bool sten
  * framebuffer handling
  */
 
+static void si_choose_spi_color_formats(struct r600_surface *surf,
+                                       unsigned format, unsigned swap,
+                                       unsigned ntype)
+{
+       unsigned max_comp_size = si_colorformat_max_comp_size(format);
+
+       surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
+
+       if (ntype == V_028C70_NUMBER_SRGB ||
+           ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) 
&&
+            max_comp_size <= 10) ||
+           (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
+               surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
+}
+
 static void si_initialize_color_surface(struct si_context *sctx,
                                        struct r600_surface *surf)
 {
@@ -1896,7 +1912,6 @@ static void si_initialize_color_surface(struct si_context 
*sctx,
        const struct util_format_description *desc;
        int i;
        unsigned blend_clamp = 0, blend_bypass = 0;
-       unsigned max_comp_size;
 
        /* Layered rendering doesn't work with LINEAR_GENERAL.
         * (LINEAR_ALIGNED and others work) */
@@ -2053,13 +2068,7 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
        }
 
        /* Determine pixel shader export format */
-       max_comp_size = si_colorformat_max_comp_size(format);
-       if (ntype == V_028C70_NUMBER_SRGB ||
-           ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) 
&&
-            max_comp_size <= 10) ||
-           (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
-               surf->export_16bpc = true;
-       }
+       si_choose_spi_color_formats(surf, format, swap, ntype);
 
        if (sctx->b.family == CHIP_STONEY &&
            !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
@@ -2286,7 +2295,7 @@ static void si_set_framebuffer_state(struct pipe_context 
*ctx,
 
        util_copy_framebuffer_state(&sctx->framebuffer.state, state);
 
-       sctx->framebuffer.export_16bpc = 0;
+       sctx->framebuffer.spi_shader_col_format = 0;
        sctx->framebuffer.compressed_cb_mask = 0;
        sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
        sctx->framebuffer.log_samples = 
util_logbase2(sctx->framebuffer.nr_samples);
@@ -2307,9 +2316,8 @@ static void si_set_framebuffer_state(struct pipe_context 
*ctx,
                        si_initialize_color_surface(sctx, surf);
                }
 
-               if (surf->export_16bpc) {
-                       sctx->framebuffer.export_16bpc |= 1 << i;
-               }
+               sctx->framebuffer.spi_shader_col_format |=
+                       surf->spi_shader_col_format << (i * 4);
 
                if (rtex->fmask.size && rtex->cmask.size) {
                        sctx->framebuffer.compressed_cb_mask |= 1 << i;
@@ -2317,12 +2325,11 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
                r600_context_add_resource_size(ctx, surf->base.texture);
        }
        /* Set the 16BPC export for possible dual-src blending. */
-       if (i == 1 && surf && surf->export_16bpc) {
-               sctx->framebuffer.export_16bpc |= 1 << 1;
+       if (i == 1 && surf) {
+               sctx->framebuffer.spi_shader_col_format |=
+                       surf->spi_shader_col_format << (i * 4);
        }
 
-       assert(!(sctx->framebuffer.export_16bpc & ~0xff));
-
        if (state->zsbuf) {
                surf = (struct r600_surface*)state->zsbuf;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index f5ca661..46ba3c4 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -39,6 +39,7 @@ struct si_shader;
 struct si_state_blend {
        struct si_pm4_state     pm4;
        uint32_t                cb_target_mask;
+       bool                    alpha_to_coverage;
        bool                    alpha_to_one;
        bool                    dual_src_blend;
 };
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8ff70b4..fa78179 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -382,13 +382,28 @@ static void si_shader_vs(struct si_shader *shader)
                si_set_tesseval_regs(shader, pm4);
 }
 
+static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
+{
+       unsigned value = shader->key.ps.spi_shader_col_format;
+       unsigned i, num_targets = (util_last_bit(value) + 3) / 4;
+
+       /* If the i-th target format is set, all previous target formats must
+        * be non-zero to avoid hangs.
+        */
+       for (i = 0; i < num_targets; i++)
+               if (!(value & (0xf << (i * 4))))
+                       value |= V_028714_SPI_SHADER_32_R << (i * 4);
+
+       return value;
+}
+
 static void si_shader_ps(struct si_shader *shader)
 {
        struct tgsi_shader_info *info = &shader->selector->info;
        struct si_pm4_state *pm4;
-       unsigned i, spi_ps_in_control;
-       unsigned spi_shader_col_format = 0, cb_shader_mask = 0;
-       unsigned colors_written, export_16bpc;
+       unsigned i, spi_ps_in_control, spi_shader_col_format;
+       unsigned cb_shader_mask = 0;
+       unsigned colors_written;
        unsigned num_sgprs, num_user_sgprs;
        unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
        uint64_t va;
@@ -425,7 +440,6 @@ static void si_shader_ps(struct si_shader *shader)
 
        /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
        colors_written = info->colors_written;
-       export_16bpc = shader->key.ps.export_16bpc;
 
        if (info->colors_written == 0x1 &&
            info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
@@ -434,13 +448,20 @@ static void si_shader_ps(struct si_shader *shader)
 
        while (colors_written) {
                i = u_bit_scan(&colors_written);
-               if (export_16bpc & (1 << i))
-                       spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR 
<< (4 * i);
-               else
-                       spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << 
(4 * i);
                cb_shader_mask |= 0xf << (4 * i);
        }
 
+       spi_shader_col_format = si_get_spi_shader_col_format(shader);
+
+       /* This must be non-zero for alpha-test/kill to work.
+        * The hardware probably disables the pixel shader if no export
+        * memory is allocated.
+        */
+       if (!spi_shader_col_format &&
+           (shader->selector->info.uses_kill ||
+            shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
+               spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+
        /* Set interpolation controls. */
        has_centroid = 
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
                       
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
@@ -571,12 +592,20 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
                break;
        case PIPE_SHADER_FRAGMENT: {
                struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+               struct si_state_blend *blend = sctx->queued.named.blend;
 
                if 
(sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
                    sel->info.colors_written == 0x1)
                        key->ps.last_cbuf = 
MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
 
-               key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
+               key->ps.spi_shader_col_format = 
sctx->framebuffer.spi_shader_col_format;
+
+               /* If alpha-to-coverage is enabled, we have to export alpha
+                * even if there is no color buffer.
+                */
+               if (!(key->ps.spi_shader_col_format & 0xf) &&
+                   blend && blend->alpha_to_coverage)
+                       key->ps.spi_shader_col_format |= 
V_028710_SPI_SHADER_FP16_ABGR;
 
                if (rs) {
                        bool is_poly = (sctx->current_rast_prim >= 
PIPE_PRIM_TRIANGLES &&
-- 
2.1.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to