On 21.04.2016 13:51, Bas Nieuwenhuizen wrote:
We can use shaders from multiple contexts, and they were not
otherwise locked yet.

Ouch... I guess this is why compute scratch buffers used to be per-program?

I'm still trying to wrap my head around the possible code paths here... are you sure that nothing can go wrong with the radeon_add_to_buffer_list of the scratch buffer? I'm thinking a code sequence like

Context 1 checks that sctx(1)->scratch_buffer == shader->scratch_bo.
Context 2 allocates a new scratch buffer sctx(2)->scratch_buffer and uploads a modified shader. Context 1 adds the old sctx(1)->scratch_buffer to its buffer list and submits its CS. It now runs with the new shader, and may cause a VM fault.

More generally, what if two contexts use the same shader with different scratch buffers? Do we just ping-pong, re-uploading the shader each time?

Bonus question: Do we even need per-context scratch buffers? As long as we idle all shaders at CS flush, we should be fine with a per-device/per-screen scratch buffer.

(Curiously, there is an old "this is probably not needed anymore" comment on the PS_PARTIAL_FLUSH in si_context_gfx_flush, but this may be wrong: since shaders can write memory, and the kernel may want to swap buffers around, we have to be extra careful about this...)

This needs a bit more thought...

Cheers,
Nicolai


Signed-off-by: Bas Nieuwenhuizen <[email protected]>
---
  src/gallium/drivers/radeonsi/si_compute.c       | 20 ++++++++++++++++----
  src/gallium/drivers/radeonsi/si_state_shaders.c | 12 ++++++++++--
  2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 7e05be5..a99a985 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -37,6 +37,7 @@
  #define MAX_GLOBAL_BUFFERS 20

  struct si_compute {
+       pipe_mutex mutex;
        unsigned ir_type;
        unsigned local_size;
        unsigned private_size;
@@ -116,6 +117,7 @@ static void *si_create_compute_state(
                si_shader_binary_upload(sctx->screen, &program->shader);
        }

+       pipe_mutex_init(program->mutex);
        return program;
  }

@@ -196,15 +198,19 @@ static void si_initialize_compute(struct si_context *sctx)
  }

  static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
-                                            struct si_shader *shader,
+                                            struct si_compute *program,
                                              struct si_shader_config *config)
  {
+       struct si_shader *shader = &program->shader;
        uint64_t scratch_bo_size, scratch_needed;
        scratch_bo_size = 0;
        scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;
        if (sctx->compute_scratch_buffer)
                scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;

+       if (!scratch_needed)
+               return true;
+
        if (scratch_bo_size < scratch_needed) {
                pipe_resource_reference(
                        (struct pipe_resource**)&sctx->compute_scratch_buffer,
@@ -218,18 +224,23 @@ static bool si_setup_compute_scratch_buffer(struct 
si_context *sctx,
                        return false;
        }

-       if (sctx->compute_scratch_buffer != shader->scratch_bo && 
scratch_needed) {
+       pipe_mutex_lock(program->mutex);
+
+       if (sctx->compute_scratch_buffer != shader->scratch_bo) {
                uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;

                si_shader_apply_scratch_relocs(sctx, shader, config, 
scratch_va);

-               if (si_shader_binary_upload(sctx->screen, shader))
+               if (si_shader_binary_upload(sctx->screen, shader)) {
+                       pipe_mutex_unlock(program->mutex);
                        return false;
+               }

                r600_resource_reference(&shader->scratch_bo,
                                        sctx->compute_scratch_buffer);
        }

+       pipe_mutex_unlock(program->mutex);
        return true;
  }

@@ -272,7 +283,7 @@ static bool si_switch_compute_shader(struct si_context 
*sctx,
                config->rsrc2 |=  S_00B84C_LDS_SIZE(lds_blocks);
        }

-       if (!si_setup_compute_scratch_buffer(sctx, shader, config))
+       if (!si_setup_compute_scratch_buffer(sctx, program, config))
                return false;

        if (shader->scratch_bo) {
@@ -503,6 +514,7 @@ static void si_delete_compute_state(struct pipe_context 
*ctx, void* state){
                sctx->cs_shader_state.emitted_program = NULL;

        si_shader_destroy(&program->shader);
+       pipe_mutex_destroy(program->mutex);
        FREE(program);
  }

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 49e688a..382481c 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1618,6 +1618,7 @@ static int si_update_scratch_buffer(struct si_context 
*sctx,
                                    struct si_shader *shader)
  {
        uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+       struct si_shader_selector *sel = shader->selector;
        int r;

        if (!shader)
@@ -1627,10 +1628,14 @@ static int si_update_scratch_buffer(struct si_context 
*sctx,
        if (shader->config.scratch_bytes_per_wave == 0)
                return 0;

+       pipe_mutex_lock(sel->mutex);
+
        /* This shader is already configured to use the current
         * scratch buffer. */
-       if (shader->scratch_bo == sctx->scratch_buffer)
+       if (shader->scratch_bo == sctx->scratch_buffer) {
+               pipe_mutex_unlock(sel->mutex);
                return 0;
+       }

        assert(sctx->scratch_buffer);

@@ -1638,14 +1643,17 @@ static int si_update_scratch_buffer(struct si_context 
*sctx,

        /* Replace the shader bo with a new bo that has the relocs applied. */
        r = si_shader_binary_upload(sctx->screen, shader);
-       if (r)
+       if (r) {
+               pipe_mutex_unlock(sel->mutex);
                return r;
+       }

        /* Update the shader state to use the new shader bo. */
        si_shader_init_pm4_state(shader);

        r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);

+       pipe_mutex_unlock(sel->mutex);
        return 1;
  }


_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to