On 11.02.19 21:27, Marek Olšák wrote:
From: Marek Olšák <[email protected]>

initialize all non-compute context functions to NULL.
---
  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
  src/gallium/drivers/radeonsi/si_state.h       |  1 +
  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
  11 files changed, 130 insertions(+), 75 deletions(-)

[snip]
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 20767c806d2..98c4fabc741 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -381,61 +381,56 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
  {
        struct si_context *sctx = CALLOC_STRUCT(si_context);
        struct si_screen* sscreen = (struct si_screen *)screen;
        struct radeon_winsys *ws = sscreen->ws;
        int shader, i;
        bool stop_exec_on_failure = (flags & 
PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
if (!sctx)
                return NULL;
+ sctx->has_graphics = sscreen->info.chip_class >= CIK &&
+                            !(flags & PIPE_CONTEXT_COMPUTE_ONLY);

The logic seems backwards here for SI.

Cheers,
Nicolai



+
        if (flags & PIPE_CONTEXT_DEBUG)
                sscreen->record_llvm_ir = true; /* racy but not critical */
sctx->b.screen = screen; /* this must be set first */
        sctx->b.priv = NULL;
        sctx->b.destroy = si_destroy_context;
-       sctx->b.emit_string_marker = si_emit_string_marker;
-       sctx->b.set_debug_callback = si_set_debug_callback;
-       sctx->b.set_log_context = si_set_log_context;
-       sctx->b.set_context_param = si_set_context_param;
        sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
        sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
        slab_create_child(&sctx->pool_transfers_unsync, 
&sscreen->pool_transfers);
sctx->ws = sscreen->ws;
        sctx->family = sscreen->info.family;
        sctx->chip_class = sscreen->info.chip_class;
if (sscreen->info.has_gpu_reset_counter_query) {
                sctx->gpu_reset_counter =
                        sctx->ws->query_value(sctx->ws, 
RADEON_GPU_RESET_COUNTER);
        }
- sctx->b.get_device_reset_status = si_get_reset_status;
-       sctx->b.set_device_reset_callback = si_set_device_reset_callback;
-
-       si_init_context_texture_functions(sctx);
-       si_init_query_functions(sctx);
if (sctx->chip_class == CIK ||
            sctx->chip_class == VI ||
            sctx->chip_class == GFX9) {
                sctx->eop_bug_scratch = si_resource(
                        pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
                                           16 * 
sscreen->info.num_render_backends));
                if (!sctx->eop_bug_scratch)
                        goto fail;
        }
+ /* Initialize context allocators. */
        sctx->allocator_zeroed_memory =
                u_suballocator_create(&sctx->b, 128 * 1024,
                                      0, PIPE_USAGE_DEFAULT,
                                      SI_RESOURCE_FLAG_UNMAPPABLE |
                                      SI_RESOURCE_FLAG_CLEAR, false);
        if (!sctx->allocator_zeroed_memory)
                goto fail;
sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
                                                    0, PIPE_USAGE_STREAM,
@@ -459,38 +454,22 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
        sctx->ctx = sctx->ws->ctx_create(sctx->ws);
        if (!sctx->ctx)
                goto fail;
if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
                sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
                                                   (void*)si_flush_dma_cs,
                                                   sctx, stop_exec_on_failure);
        }
- si_init_buffer_functions(sctx);
-       si_init_clear_functions(sctx);
-       si_init_blit_functions(sctx);
-       si_init_compute_functions(sctx);
-       si_init_compute_blit_functions(sctx);
-       si_init_debug_functions(sctx);
-       si_init_msaa_functions(sctx);
-       si_init_streamout_functions(sctx);
-
-       if (sscreen->info.has_hw_decode) {
-               sctx->b.create_video_codec = si_uvd_create_decoder;
-               sctx->b.create_video_buffer = si_video_buffer_create;
-       } else {
-               sctx->b.create_video_codec = vl_create_decoder;
-               sctx->b.create_video_buffer = vl_video_buffer_create;
-       }
-
-       sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
+       sctx->gfx_cs = ws->cs_create(sctx->ctx,
+                                    sctx->has_graphics ? RING_GFX : 
RING_COMPUTE,
                                     (void*)si_flush_gfx_cs, sctx, 
stop_exec_on_failure);
/* Border colors. */
        sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
                                          sizeof(*sctx->border_color_table));
        if (!sctx->border_color_table)
                goto fail;
sctx->border_color_buffer = si_resource(
                pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
@@ -498,43 +477,76 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
                                   sizeof(*sctx->border_color_table)));
        if (!sctx->border_color_buffer)
                goto fail;
sctx->border_color_map =
                ws->buffer_map(sctx->border_color_buffer->buf,
                               NULL, PIPE_TRANSFER_WRITE);
        if (!sctx->border_color_map)
                goto fail;
+ /* Initialize context functions used by graphics and compute. */
+       sctx->b.emit_string_marker = si_emit_string_marker;
+       sctx->b.set_debug_callback = si_set_debug_callback;
+       sctx->b.set_log_context = si_set_log_context;
+       sctx->b.set_context_param = si_set_context_param;
+       sctx->b.get_device_reset_status = si_get_reset_status;
+       sctx->b.set_device_reset_callback = si_set_device_reset_callback;
+       sctx->b.memory_barrier = si_memory_barrier;
+
        si_init_all_descriptors(sctx);
+       si_init_buffer_functions(sctx);
+       si_init_clear_functions(sctx);
+       si_init_blit_functions(sctx);
+       si_init_compute_functions(sctx);
+       si_init_compute_blit_functions(sctx);
+       si_init_debug_functions(sctx);
        si_init_fence_functions(sctx);
-       si_init_state_functions(sctx);
-       si_init_shader_functions(sctx);
-       si_init_viewport_functions(sctx);
-
-       if (sctx->chip_class >= CIK)
-               cik_init_sdma_functions(sctx);
-       else
-               si_init_dma_functions(sctx);
if (sscreen->debug_flags & DBG(FORCE_DMA))
                sctx->b.resource_copy_region = sctx->dma_copy;
- sctx->blitter = util_blitter_create(&sctx->b);
-       if (sctx->blitter == NULL)
-               goto fail;
-       sctx->blitter->skip_viewport_restore = true;
+       /* Initialize graphics-only context functions. */
+       if (sctx->has_graphics) {
+               si_init_context_texture_functions(sctx);
+               si_init_query_functions(sctx);
+               si_init_msaa_functions(sctx);
+               si_init_shader_functions(sctx);
+               si_init_state_functions(sctx);
+               si_init_streamout_functions(sctx);
+               si_init_viewport_functions(sctx);
+
+               sctx->blitter = util_blitter_create(&sctx->b);
+               if (sctx->blitter == NULL)
+                       goto fail;
+               sctx->blitter->skip_viewport_restore = true;
- si_init_draw_functions(sctx);
+               si_init_draw_functions(sctx);
+       }
+
+       /* Initialize SDMA functions. */
+       if (sctx->chip_class >= CIK)
+               cik_init_sdma_functions(sctx);
+       else
+               si_init_dma_functions(sctx);
sctx->sample_mask = 0xffff; + /* Initialize multimedia functions. */
+       if (sscreen->info.has_hw_decode) {
+               sctx->b.create_video_codec = si_uvd_create_decoder;
+               sctx->b.create_video_buffer = si_video_buffer_create;
+       } else {
+               sctx->b.create_video_codec = vl_create_decoder;
+               sctx->b.create_video_buffer = vl_video_buffer_create;
+       }
+
        if (sctx->chip_class >= GFX9) {
                sctx->wait_mem_scratch = si_resource(
                        pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
                if (!sctx->wait_mem_scratch)
                        goto fail;
/* Initialize the memory. */
                si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
                                 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
        }
@@ -544,21 +556,22 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
        if (sctx->chip_class == CIK) {
                sctx->null_const_buf.buffer =
                        pipe_aligned_buffer_create(screen,
                                                   SI_RESOURCE_FLAG_32BIT,
                                                   PIPE_USAGE_DEFAULT, 16,
                                                   
sctx->screen->info.tcc_cache_line_size);
                if (!sctx->null_const_buf.buffer)
                        goto fail;
                sctx->null_const_buf.buffer_size = 
sctx->null_const_buf.buffer->width0;
- for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
+               unsigned start_shader = sctx->has_graphics ? 0 :  
PIPE_SHADER_COMPUTE;
+               for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) {
                        for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
                                sctx->b.set_constant_buffer(&sctx->b, shader, i,
                                                              
&sctx->null_const_buf);
                        }
                }
si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
                                 &sctx->null_const_buf);
                si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
                                 &sctx->null_const_buf);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b01d5744752..348e8e5bd26 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -777,21 +777,21 @@ struct si_saved_cs {
  };
struct si_context {
        struct pipe_context             b; /* base class */
enum radeon_family family;
        enum chip_class                 chip_class;
struct radeon_winsys *ws;
        struct radeon_winsys_ctx        *ctx;
-       struct radeon_cmdbuf            *gfx_cs;
+       struct radeon_cmdbuf            *gfx_cs; /* compute IB if graphics is 
disabled */
        struct radeon_cmdbuf            *dma_cs;
        struct pipe_fence_handle        *last_gfx_fence;
        struct pipe_fence_handle        *last_sdma_fence;
        struct si_resource              *eop_bug_scratch;
        struct u_upload_mgr             *cached_gtt_allocator;
        struct threaded_context         *tc;
        struct u_suballocator           *allocator_zeroed_memory;
        struct slab_child_pool          pool_transfers;
        struct slab_child_pool          pool_transfers_unsync; /* for 
threaded_context */
        struct pipe_device_reset_callback device_reset_callback;
@@ -815,20 +815,21 @@ struct si_context {
        void                            *cs_clear_render_target;
        void                            *cs_clear_render_target_1d_array;
        struct si_screen                *screen;
        struct pipe_debug_callback      debug;
        struct ac_llvm_compiler         compiler; /* only non-threaded 
compilation */
        struct si_shader_ctx_state      fixed_func_tcs_shader;
        struct si_resource              *wait_mem_scratch;
        unsigned                        wait_mem_number;
        uint16_t                        prefetch_L2_mask;
+ bool has_graphics;
        bool                            gfx_flush_in_progress:1;
        bool                            gfx_last_ib_is_busy:1;
        bool                            compute_is_busy:1;
unsigned num_gfx_cs_flushes;
        unsigned                        initial_gfx_cs_size;
        unsigned                        gpu_reset_counter;
        unsigned                        last_dirty_tex_counter;
        unsigned                        last_compressed_colortex_counter;
        unsigned                        last_num_draw_calls;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index b49a1b3695e..458b108a7e3 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct pipe_context 
*ctx, unsigned flags)
si_update_fb_dirtiness_after_rendering(sctx); /* Multisample surfaces are flushed in si_decompress_textures. */
        if (sctx->framebuffer.uncompressed_cb_mask)
                si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
                                           
sctx->framebuffer.CB_has_shader_readable_metadata);
  }
/* This only ensures coherency for shader image/buffer stores. */
-static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
+void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
  {
        struct si_context *sctx = (struct si_context *)ctx;
/* Subsequent commands must wait for all shader invocations to
         * complete. */
        sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
                         SI_CONTEXT_CS_PARTIAL_FLUSH;
if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
                sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
@@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context *sctx)
        sctx->b.sampler_view_destroy = si_sampler_view_destroy;
sctx->b.set_sample_mask = si_set_sample_mask; sctx->b.create_vertex_elements_state = si_create_vertex_elements;
        sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
        sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
        sctx->b.set_vertex_buffers = si_set_vertex_buffers;
sctx->b.texture_barrier = si_texture_barrier;
-       sctx->b.memory_barrier = si_memory_barrier;
        sctx->b.set_min_samples = si_set_min_samples;
        sctx->b.set_tess_state = si_set_tess_state;
sctx->b.set_active_query_state = si_set_active_query_state; si_init_config(sctx);
  }
void si_init_screen_state_functions(struct si_screen *sscreen)
  {
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 767e789276a..6faa4c511b1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct 
si_context *sctx,
                                          struct si_shader_selector *sel);
  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
                                             struct pb_slab_entry *entry);
  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
                                                  unsigned entry_size,
                                                  unsigned group_index);
  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
                      uint64_t old_va);
  /* si_state.c */
+void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
  void si_init_state_functions(struct si_context *sctx);
  void si_init_screen_state_functions(struct si_screen *sscreen);
  void
  si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
                          enum pipe_format format,
                          unsigned offset, unsigned size,
                          uint32_t *state);
  void
  si_make_texture_descriptor(struct si_screen *screen,
                           struct si_texture *tex,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 9c968e39c2c..2a514f144b9 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct si_context *sctx,
                                        
S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
                }
        }
  }
static void si_emit_surface_sync(struct si_context *sctx,
                                 unsigned cp_coher_cntl)
  {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
- if (sctx->chip_class >= GFX9) {
+       if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
                /* Flush caches and wait for the caches to assert idle. */
                radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
                radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
                radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
                radeon_emit(cs, 0xffffff);      /* CP_COHER_SIZE_HI */
                radeon_emit(cs, 0);             /* CP_COHER_BASE */
                radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */
                radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
        } else {
                /* ACQUIRE_MEM is only required on a compute ring. */
@@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct si_context *sctx,
                radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
                radeon_emit(cs, 0);               /* CP_COHER_BASE */
                radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
        }
  }
void si_emit_cache_flush(struct si_context *sctx)
  {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
        uint32_t flags = sctx->flags;
+
+       if (!sctx->has_graphics) {
+               /* Only process compute flags. */
+               flags &= SI_CONTEXT_INV_ICACHE |
+                        SI_CONTEXT_INV_SMEM_L1 |
+                        SI_CONTEXT_INV_VMEM_L1 |
+                        SI_CONTEXT_INV_GLOBAL_L2 |
+                        SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
+                        SI_CONTEXT_INV_L2_METADATA |
+                        SI_CONTEXT_CS_PARTIAL_FLUSH;
+       }
+
        uint32_t cp_coher_cntl = 0;
        uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
                                        SI_CONTEXT_FLUSH_AND_INV_DB);
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
                sctx->num_cb_cache_flushes++;
        if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
                sctx->num_db_cache_flushes++;
/* SI has a bug that it always flushes ICACHE and KCACHE if either
@@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context *sctx)
                                  EOP_DATA_SEL_VALUE_32BIT,
                                  sctx->wait_mem_scratch, va,
                                  sctx->wait_mem_number, SI_NOT_QUERY);
                si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,
                               WAIT_REG_MEM_EQUAL);
        }
/* Make sure ME is idle (it executes most packets) before continuing.
         * This prevents read-after-write hazards between PFP and ME.
         */
-       if (cp_coher_cntl ||
-           (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
-                           SI_CONTEXT_INV_VMEM_L1 |
-                           SI_CONTEXT_INV_GLOBAL_L2 |
-                           SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
+       if (sctx->has_graphics &&
+           (cp_coher_cntl ||
+            (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
+                      SI_CONTEXT_INV_VMEM_L1 |
+                      SI_CONTEXT_INV_GLOBAL_L2 |
+                      SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
                radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
                radeon_emit(cs, 0);
        }
/* SI-CI-VI only:
         *   When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC
         *   waits for idle, so it should be last. SURFACE_SYNC is done in PFP.
         *
         * cp_coher_cntl should contain all necessary flags except TC flags
         * at this point.
diff --git a/src/gallium/drivers/radeonsi/si_texture.c 
b/src/gallium/drivers/radeonsi/si_texture.c
index a50088d2d8f..581f90a7b2f 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct si_screen 
*sscreen,
   *   compressed tiled
   *
   * \param sctx  the current context if you have one, or sscreen->aux_context
   *              if you don't.
   */
  bool si_texture_disable_dcc(struct si_context *sctx,
                            struct si_texture *tex)
  {
        struct si_screen *sscreen = sctx->screen;
+ if (!sctx->has_graphics)
+               return si_texture_discard_dcc(sscreen, tex);
+
        if (!si_can_disable_dcc(tex))
                return false;
if (&sctx->b == sscreen->aux_context)
                mtx_lock(&sscreen->aux_context_lock);
/* Decompress DCC. */
        si_decompress_dcc(sctx, tex);
        sctx->b.flush(&sctx->b, NULL, 0);

--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to