From: Nicolai Hähnle <[email protected]>

---
 src/gallium/drivers/radeonsi/si_descriptors.c |  61 +++++++++++++-
 src/gallium/drivers/radeonsi/si_pipe.h        |   1 +
 src/gallium/drivers/radeonsi/si_shader.c      |   5 +-
 src/gallium/drivers/radeonsi/si_shader.h      | 114 +++++++++++++-------------
 4 files changed, 123 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index c7c30bf..72bd50f 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -746,6 +746,55 @@ static void si_set_constant_buffer(struct pipe_context 
*ctx, uint shader, uint s
        buffers->desc.list_dirty = true;
 }
 
+/* SHADER BUFFERS */
+
+static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader,
+                                 unsigned start_slot, unsigned count,
+                                 struct pipe_shader_buffer *sbuffers)
+{
+       struct si_context *sctx = (struct si_context *)ctx;
+       struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
+       unsigned i;
+
+       assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
+
+       for (i = 0; i < count; ++i) {
+               struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : 
NULL;
+               struct r600_resource *buf;
+               unsigned slot = start_slot + i;
+               uint32_t *desc = buffers->desc.list + slot * 4;
+               uint64_t va;
+
+               if (!sbuffer || !sbuffer->buffer) {
+                       pipe_resource_reference(&buffers->buffers[slot], NULL);
+                       memset(desc, 0, sizeof(uint32_t) * 4);
+                       buffers->desc.enabled_mask &= ~(1llu << slot);
+                       continue;
+               }
+
+               buf = (struct r600_resource *)sbuffer->buffer;
+               va = buf->gpu_address + sbuffer->buffer_offset;
+
+               desc[0] = va;
+               desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+                         S_008F04_STRIDE(0);
+               desc[2] = sbuffer->buffer_size;
+               desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                         S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                         S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+               pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf,
+                                     buffers->shader_usage, buffers->priority);
+               buffers->desc.enabled_mask |= 1llu << slot;
+       }
+
+       buffers->desc.list_dirty = true;
+}
+
 /* RING BUFFERS */
 
 void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -1072,10 +1121,12 @@ static void si_invalidate_buffer(struct pipe_context 
*ctx, struct pipe_resource
                }
        }
 
-       /* Constant buffers. */
+       /* Constant and shader buffers. */
        for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
                si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
                                          buf, old_va);
+               si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
+                                         buf, old_va);
        }
 
        /* Texture buffers - update virtual addresses in sampler view 
descriptors. */
@@ -1255,6 +1306,7 @@ void si_emit_shader_userdata(struct si_context *sctx, 
struct r600_atom *atom)
                        si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, 
base, false);
 
                si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, 
base, false);
+               si_emit_shader_pointer(sctx, &sctx->shader_buffers[i].desc, 
base, false);
                si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, 
base, false);
                si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, 
false);
        }
@@ -1274,6 +1326,9 @@ void si_init_all_descriptors(struct si_context *sctx)
                si_init_buffer_resources(&sctx->rw_buffers[i],
                                         SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
                                         RADEON_USAGE_READWRITE, 
RADEON_PRIO_RINGS_STREAMOUT);
+               si_init_buffer_resources(&sctx->shader_buffers[i],
+                                        SI_NUM_SHADER_BUFFERS, 
SI_SGPR_SHADER_BUFFERS,
+                                        RADEON_USAGE_READWRITE, 
RADEON_PRIO_SHADER_RW_BUFFER);
 
                si_init_descriptors(&sctx->samplers[i].views.desc,
                                    SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
@@ -1291,6 +1346,7 @@ void si_init_all_descriptors(struct si_context *sctx)
        sctx->b.b.bind_sampler_states = si_bind_sampler_states;
        sctx->b.b.set_shader_images = si_set_shader_images;
        sctx->b.b.set_constant_buffer = si_set_constant_buffer;
+       sctx->b.b.set_shader_buffers = si_set_shader_buffers;
        sctx->b.b.set_sampler_views = si_set_sampler_views;
        sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
        sctx->b.invalidate_buffer = si_invalidate_buffer;
@@ -1313,6 +1369,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
        for (i = 0; i < SI_NUM_SHADERS; i++) {
                if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) 
||
                    !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
+                   !si_upload_descriptors(sctx, &sctx->shader_buffers[i].desc) 
||
                    !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) 
||
                    !si_upload_descriptors(sctx, &sctx->images[i].desc))
                        return false;
@@ -1327,6 +1384,7 @@ void si_release_all_descriptors(struct si_context *sctx)
        for (i = 0; i < SI_NUM_SHADERS; i++) {
                si_release_buffer_resources(&sctx->const_buffers[i]);
                si_release_buffer_resources(&sctx->rw_buffers[i]);
+               si_release_buffer_resources(&sctx->shader_buffers[i]);
                si_release_sampler_views(&sctx->samplers[i].views);
                si_release_image_views(&sctx->images[i]);
        }
@@ -1340,6 +1398,7 @@ void si_all_descriptors_begin_new_cs(struct si_context 
*sctx)
        for (i = 0; i < SI_NUM_SHADERS; i++) {
                si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
                si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
+               si_buffer_resources_begin_new_cs(sctx, 
&sctx->shader_buffers[i]);
                si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
                si_image_views_begin_new_cs(sctx, &sctx->images[i]);
        }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 6d0d687..dfdb8bb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -256,6 +256,7 @@ struct si_context {
        struct si_descriptors           vertex_buffers;
        struct si_buffer_resources      const_buffers[SI_NUM_SHADERS];
        struct si_buffer_resources      rw_buffers[SI_NUM_SHADERS];
+       struct si_buffer_resources      shader_buffers[SI_NUM_SHADERS];
        struct si_textures_info         samplers[SI_NUM_SHADERS];
        struct si_images_info           images[SI_NUM_SHADERS];
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1e4bf82..efc00f3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4408,7 +4408,8 @@ static void create_function(struct si_shader_context *ctx)
        params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, 
SI_NUM_CONST_BUFFERS);
        params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
        params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
-       last_array_pointer = SI_PARAM_IMAGES;
+       params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, 
SI_NUM_SHADER_BUFFERS);
+       last_array_pointer = SI_PARAM_SHADER_BUFFERS;
 
        switch (ctx->type) {
        case TGSI_PROCESSOR_VERTEX:
@@ -5988,6 +5989,7 @@ static bool si_compile_tcs_epilog(struct si_screen 
*sscreen,
        params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
        params[SI_PARAM_SAMPLERS] = ctx.i64;
        params[SI_PARAM_IMAGES] = ctx.i64;
+       params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
        params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
        params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
        params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
@@ -6238,6 +6240,7 @@ static bool si_compile_ps_epilog(struct si_screen 
*sscreen,
        params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
        params[SI_PARAM_SAMPLERS] = ctx.i64;
        params[SI_PARAM_IMAGES] = ctx.i64;
+       params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
        params[SI_PARAM_ALPHA_REF] = ctx.f32;
        last_array_pointer = -1;
        last_sgpr = SI_PARAM_ALPHA_REF;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 8059edf..013c8a2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -81,95 +81,97 @@ struct radeon_shader_reloc;
 #define SI_SGPR_CONST_BUFFERS  2
 #define SI_SGPR_SAMPLERS       4  /* images & sampler states interleaved */
 #define SI_SGPR_IMAGES         6
-#define SI_SGPR_VERTEX_BUFFERS 8  /* VS only */
-#define SI_SGPR_BASE_VERTEX    10 /* VS only */
-#define SI_SGPR_START_INSTANCE 11 /* VS only */
-#define SI_SGPR_VS_STATE_BITS  12 /* VS(VS) only */
-#define SI_SGPR_LS_OUT_LAYOUT  12 /* VS(LS) only */
-#define SI_SGPR_TCS_OUT_OFFSETS        8  /* TCS & TES only */
-#define SI_SGPR_TCS_OUT_LAYOUT 9  /* TCS & TES only */
-#define SI_SGPR_TCS_IN_LAYOUT  10 /* TCS only */
-#define SI_SGPR_ALPHA_REF      8  /* PS only */
-
-#define SI_VS_NUM_USER_SGPR    13 /* API VS */
-#define SI_ES_NUM_USER_SGPR    12 /* API VS */
-#define SI_LS_NUM_USER_SGPR    13 /* API VS */
-#define SI_TCS_NUM_USER_SGPR   11
-#define SI_TES_NUM_USER_SGPR   10
-#define SI_GS_NUM_USER_SGPR    8
+#define SI_SGPR_SHADER_BUFFERS 8
+#define SI_SGPR_VERTEX_BUFFERS 10  /* VS only */
+#define SI_SGPR_BASE_VERTEX    12 /* VS only */
+#define SI_SGPR_START_INSTANCE 13 /* VS only */
+#define SI_SGPR_VS_STATE_BITS  14 /* VS(VS) only */
+#define SI_SGPR_LS_OUT_LAYOUT  14 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS        10 /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT 11 /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT  12 /* TCS only */
+#define SI_SGPR_ALPHA_REF      10 /* PS only */
+
+#define SI_VS_NUM_USER_SGPR    15 /* API VS */
+#define SI_ES_NUM_USER_SGPR    14 /* API VS */
+#define SI_LS_NUM_USER_SGPR    15 /* API VS */
+#define SI_TCS_NUM_USER_SGPR   13
+#define SI_TES_NUM_USER_SGPR   12
+#define SI_GS_NUM_USER_SGPR    10
 #define SI_GSCOPY_NUM_USER_SGPR        4
-#define SI_PS_NUM_USER_SGPR    9
+#define SI_PS_NUM_USER_SGPR    11
 
 /* LLVM function parameter indices */
 #define SI_PARAM_RW_BUFFERS    0
 #define SI_PARAM_CONST_BUFFERS 1
 #define SI_PARAM_SAMPLERS      2
 #define SI_PARAM_IMAGES                3
+#define SI_PARAM_SHADER_BUFFERS        4
 
 /* VS only parameters */
-#define SI_PARAM_VERTEX_BUFFERS        4
-#define SI_PARAM_BASE_VERTEX   5
-#define SI_PARAM_START_INSTANCE        6
+#define SI_PARAM_VERTEX_BUFFERS        5
+#define SI_PARAM_BASE_VERTEX   6
+#define SI_PARAM_START_INSTANCE        7
 /* [0] = clamp vertex color */
-#define SI_PARAM_VS_STATE_BITS 7
+#define SI_PARAM_VS_STATE_BITS 8
 /* the other VS parameters are assigned dynamically */
 
 /* Offsets where TCS outputs and TCS patch outputs live in LDS:
  *   [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
  *   [16:31] = TCS output patch0 offset for per-patch / 16, max = 
NUM_PATCHES*32*32* + 32*32
  */
-#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_OFFSETS 5 /* for TCS & TES */
 
 /* Layout of TCS outputs / TES inputs:
  *   [0:12] = stride between output patches in dwords, num_outputs * 
num_vertices * 4, max = 32*32*4
  *   [13:20] = stride between output vertices in dwords = num_inputs * 4, max 
= 32*4
  *   [26:31] = gl_PatchVerticesIn, max = 32
  */
-#define SI_PARAM_TCS_OUT_LAYOUT        5 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_LAYOUT        6 /* for TCS & TES */
 
 /* Layout of LS outputs / TCS inputs
  *   [0:12] = stride between patches in dwords = num_inputs * num_vertices * 
4, max = 32*32*4
  *   [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
  */
-#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
-#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
+#define SI_PARAM_TCS_IN_LAYOUT 7 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT 8 /* same value as TCS_IN_LAYOUT, LS only */
 
 /* TCS only parameters. */
-#define SI_PARAM_TESS_FACTOR_OFFSET 7
-#define SI_PARAM_PATCH_ID      8
-#define SI_PARAM_REL_IDS       9
+#define SI_PARAM_TESS_FACTOR_OFFSET 8
+#define SI_PARAM_PATCH_ID      9
+#define SI_PARAM_REL_IDS       10
 
 /* GS only parameters */
-#define SI_PARAM_GS2VS_OFFSET  4
-#define SI_PARAM_GS_WAVE_ID    5
-#define SI_PARAM_VTX0_OFFSET   6
-#define SI_PARAM_VTX1_OFFSET   7
-#define SI_PARAM_PRIMITIVE_ID  8
-#define SI_PARAM_VTX2_OFFSET   9
-#define SI_PARAM_VTX3_OFFSET   10
-#define SI_PARAM_VTX4_OFFSET   11
-#define SI_PARAM_VTX5_OFFSET   12
-#define SI_PARAM_GS_INSTANCE_ID        13
+#define SI_PARAM_GS2VS_OFFSET  5
+#define SI_PARAM_GS_WAVE_ID    6
+#define SI_PARAM_VTX0_OFFSET   7
+#define SI_PARAM_VTX1_OFFSET   8
+#define SI_PARAM_PRIMITIVE_ID  9
+#define SI_PARAM_VTX2_OFFSET   10
+#define SI_PARAM_VTX3_OFFSET   11
+#define SI_PARAM_VTX4_OFFSET   12
+#define SI_PARAM_VTX5_OFFSET   13
+#define SI_PARAM_GS_INSTANCE_ID        14
 
 /* PS only parameters */
-#define SI_PARAM_ALPHA_REF             4
-#define SI_PARAM_PRIM_MASK             5
-#define SI_PARAM_PERSP_SAMPLE          6
-#define SI_PARAM_PERSP_CENTER          7
-#define SI_PARAM_PERSP_CENTROID                8
-#define SI_PARAM_PERSP_PULL_MODEL      9
-#define SI_PARAM_LINEAR_SAMPLE         10
-#define SI_PARAM_LINEAR_CENTER         11
-#define SI_PARAM_LINEAR_CENTROID       12
-#define SI_PARAM_LINE_STIPPLE_TEX      13
-#define SI_PARAM_POS_X_FLOAT           14
-#define SI_PARAM_POS_Y_FLOAT           15
-#define SI_PARAM_POS_Z_FLOAT           16
-#define SI_PARAM_POS_W_FLOAT           17
-#define SI_PARAM_FRONT_FACE            18
-#define SI_PARAM_ANCILLARY             19
-#define SI_PARAM_SAMPLE_COVERAGE       20
-#define SI_PARAM_POS_FIXED_PT          21
+#define SI_PARAM_ALPHA_REF             5
+#define SI_PARAM_PRIM_MASK             6
+#define SI_PARAM_PERSP_SAMPLE          7
+#define SI_PARAM_PERSP_CENTER          8
+#define SI_PARAM_PERSP_CENTROID                9
+#define SI_PARAM_PERSP_PULL_MODEL      10
+#define SI_PARAM_LINEAR_SAMPLE         11
+#define SI_PARAM_LINEAR_CENTER         12
+#define SI_PARAM_LINEAR_CENTROID       13
+#define SI_PARAM_LINE_STIPPLE_TEX      14
+#define SI_PARAM_POS_X_FLOAT           15
+#define SI_PARAM_POS_Y_FLOAT           16
+#define SI_PARAM_POS_Z_FLOAT           17
+#define SI_PARAM_POS_W_FLOAT           18
+#define SI_PARAM_FRONT_FACE            19
+#define SI_PARAM_ANCILLARY             20
+#define SI_PARAM_SAMPLE_COVERAGE       21
+#define SI_PARAM_POS_FIXED_PT          22
 
 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
 
-- 
2.5.0

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to