From: Dave Airlie <airl...@redhat.com>

We just pass these in from outside in a constant buffer.

The shader side stores them once they are accessed once.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/gallium/drivers/r600/evergreen_compute.c |  9 +++-
 src/gallium/drivers/r600/r600_pipe.h         |  3 ++
 src/gallium/drivers/r600/r600_shader.c       | 62 ++++++++++++++++++++++++++++
 src/gallium/drivers/r600/r600_state_common.c | 16 ++++++-
 4 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index cf86440..4c888a2 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -724,10 +724,17 @@ static void compute_emit_cs(struct r600_context *rctx,
                bool need_buf_const = current->shader.uses_tex_buffers ||
                        current->shader.has_txq_cube_array_z_comp;
 
+               for (int i = 0; i < 3; i++) {
+                       rctx->cs_block_grid_sizes[i] = info->block[i];
+                       rctx->cs_block_grid_sizes[i + 4] = info->grid[i];
+               }
+               rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
+               
rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
                if (need_buf_const) {
                        eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE);
-                       r600_update_driver_const_buffers(rctx, true);
                }
+               r600_update_driver_const_buffers(rctx, true);
+
                if (evergreen_emit_atomic_buffer_setup(rctx, current, 
combined_atomics, &atomic_used_mask)) {
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) 
| EVENT_INDEX(4));
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 65d1185..0f5dc6b 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -78,6 +78,7 @@
 /* start driver buffers after user buffers */
 #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 #define R600_UCP_SIZE (4*4*8)
+#define R600_CS_BLOCK_GRID_SIZE (8 * 4)
 #define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
 
 #define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
@@ -396,6 +397,7 @@ struct r600_shader_driver_constants_info {
        bool                            vs_ucp_dirty;
        bool                            texture_const_dirty;
        bool                            ps_sample_pos_dirty;
+       bool                            cs_block_grid_size_dirty;
 };
 
 struct r600_constbuf_state
@@ -575,6 +577,7 @@ struct r600_context {
        struct r600_isa         *isa;
        float sample_positions[4 * 16];
        float tess_state[8];
+       uint32_t cs_block_grid_sizes[8]; /* 3 for grid + 1 pad, 3 for block  + 
1 pad*/
        bool tess_state_dirty;
        struct r600_pipe_shader_selector *last_ls;
        struct r600_pipe_shader_selector *last_tcs;
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index b3c29b9..ee6f613 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -346,6 +346,8 @@ struct r600_shader_ctx {
        boolean                 clip_vertex_write;
        unsigned                cv_output;
        unsigned                edgeflag_output;
+       int                                     cs_block_size_reg;
+       int                                     cs_grid_size_reg;
        int                                     fragcoord_input;
        int                                     native_integers;
        int                                     next_ring_offset;
@@ -1308,6 +1310,60 @@ static int load_sample_position(struct r600_shader_ctx 
*ctx, struct r600_shader_
        return t1;
 }
 
+static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
+{
+       struct r600_bytecode_vtx vtx;
+       int r, t1;
+
+       if (load_block && ctx->cs_block_size_reg != -1)
+               return ctx->cs_block_size_reg;
+       if (!load_block && ctx->cs_grid_size_reg != -1)
+               return ctx->cs_grid_size_reg;
+       t1 = r600_get_temp(ctx);
+
+       struct r600_bytecode_alu alu;
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       alu.src[0].sel = V_SQ_ALU_SRC_0;
+       alu.dst.sel = t1;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+       vtx.op = FETCH_OP_VFETCH;
+       vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
+       vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+       vtx.src_gpr = t1;
+       vtx.src_sel_x = 0;
+
+       vtx.mega_fetch_count = 16;
+       vtx.dst_gpr = t1;
+       vtx.dst_sel_x = 0;
+       vtx.dst_sel_y = 1;
+       vtx.dst_sel_z = 2;
+       vtx.dst_sel_w = 7;
+       vtx.data_format = FMT_32_32_32_32;
+       vtx.num_format_all = 1;
+       vtx.format_comp_all = 0;
+       vtx.use_const_fields = 0;
+       vtx.offset = load_block ? 0 : 16; // first element is size of buffer
+       vtx.endian = r600_endian_swap(32);
+       vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+       r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+       if (r)
+               return r;
+
+       if (load_block)
+               ctx->cs_block_size_reg = t1;
+       else
+               ctx->cs_grid_size_reg = t1;
+       return t1;
+}
+
 static void tgsi_src(struct r600_shader_ctx *ctx,
                     const struct tgsi_full_src_register *tgsi_src,
                     struct r600_shader_src *r600_src)
@@ -1413,6 +1469,10 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
                        r600_src->swizzle[1] = 3;
                        r600_src->swizzle[2] = 3;
                        r600_src->swizzle[3] = 3;
+               } else if 
(ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
TGSI_SEMANTIC_GRID_SIZE) {
+                       r600_src->sel = load_block_grid_size(ctx, false);
+               } else if 
(ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
TGSI_SEMANTIC_BLOCK_SIZE) {
+                       r600_src->sel = load_block_grid_size(ctx, true);
                }
        } else {
                if (tgsi_src->Register.Indirect)
@@ -3139,6 +3199,8 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
        ctx.face_gpr = -1;
        ctx.fixed_pt_position_gpr = -1;
        ctx.fragcoord_input = -1;
+       ctx.cs_block_size_reg = -1;
+       ctx.cs_grid_size_reg = -1;
        ctx.colors_used = 0;
        ctx.clip_vertex_write = 0;
 
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index bddda6b..bd40774 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1230,7 +1230,8 @@ void r600_update_driver_const_buffers(struct r600_context 
*rctx, bool compute_on
                struct r600_shader_driver_constants_info *info = 
&rctx->driver_consts[sh];
                if (!info->vs_ucp_dirty &&
                    !info->texture_const_dirty &&
-                   !info->ps_sample_pos_dirty)
+                   !info->ps_sample_pos_dirty &&
+                   !info->cs_block_grid_size_dirty)
                        continue;
 
                ptr = info->constants;
@@ -1257,6 +1258,17 @@ void r600_update_driver_const_buffers(struct 
r600_context *rctx, bool compute_on
                        info->ps_sample_pos_dirty = false;
                }
 
+               if (info->cs_block_grid_size_dirty) {
+                       assert(sh == PIPE_SHADER_COMPUTE);
+                       if (!size) {
+                               ptr = rctx->cs_block_grid_sizes;
+                               size = R600_CS_BLOCK_GRID_SIZE;
+                       } else {
+                               memcpy(ptr, rctx->cs_block_grid_sizes, 
R600_CS_BLOCK_GRID_SIZE);
+                       }
+                       info->cs_block_grid_size_dirty = false;
+               }
+
                if (info->texture_const_dirty) {
                        assert (ptr);
                        assert (size);
@@ -1264,6 +1276,8 @@ void r600_update_driver_const_buffers(struct r600_context 
*rctx, bool compute_on
                                memcpy(ptr, rctx->clip_state.state.ucp, 
R600_UCP_SIZE);
                        if (sh == PIPE_SHADER_FRAGMENT)
                                memcpy(ptr, rctx->sample_positions, 
R600_UCP_SIZE);
+                       if (sh == PIPE_SHADER_COMPUTE)
+                               memcpy(ptr, rctx->cs_block_grid_sizes, 
R600_CS_BLOCK_GRID_SIZE);
                }
                info->texture_const_dirty = false;
 
-- 
2.9.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to