From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_gpu_info.c | 6 + src/amd/common/ac_gpu_info.h | 2 + src/gallium/drivers/r600/r600_pipe_common.c | 4 +- src/gallium/drivers/radeon/radeon_winsys.h | 36 ++- src/gallium/drivers/radeonsi/si_fence.c | 4 +- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 214 +++++++++++++++++- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 13 ++ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 3 +- 8 files changed, 272 insertions(+), 10 deletions(-)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 6971e4f0a8e..4d9f6afca01 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -398,6 +398,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, info->drm_minor >= 13; info->has_2d_tiling = true; info->has_read_registers_query = true; + info->has_scheduled_fence_dependency = info->drm_minor >= 28; info->num_render_backends = amdinfo->rb_pipes; /* The value returned by the kernel driver was wrong. */ @@ -463,6 +464,9 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, assert(ib_align); info->ib_start_alignment = ib_align; + info->has_gds_ordered_append = info->chip_class >= CIK && + info->drm_minor >= 29 && + HAVE_LLVM >= 0x0800; return true; } @@ -562,6 +566,8 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings); printf(" has_2d_tiling = %u\n", info->has_2d_tiling); printf(" has_read_registers_query = %u\n", info->has_read_registers_query); + printf(" has_gds_ordered_append = %u\n", info->has_gds_ordered_append); + printf(" has_scheduled_fence_dependency = %u\n", info->has_scheduled_fence_dependency); printf("Shader core info:\n"); printf(" max_shader_clock = %i\n", info->max_shader_clock); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 2c2389eaaa7..bb6984451e7 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -113,6 +113,8 @@ struct radeon_info { bool has_sparse_vm_mappings; bool has_2d_tiling; bool has_read_registers_query; + bool has_gds_ordered_append; + bool has_scheduled_fence_dependency; /* Shader cores. */ uint32_t r600_max_quad_pipes; /* wave size / 16 */ diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index abfa250435d..3c00ad691ac 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -355,8 +355,8 @@ static void r600_add_fence_dependency(struct r600_common_context *rctx, struct radeon_winsys *ws = rctx->ws; if (rctx->dma.cs) - ws->cs_add_fence_dependency(rctx->dma.cs, fence); - ws->cs_add_fence_dependency(rctx->gfx.cs, fence); + ws->cs_add_fence_dependency(rctx->dma.cs, fence, 0); + ws->cs_add_fence_dependency(rctx->gfx.cs, fence, 0); } static void r600_fence_server_sync(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index aec91c8d002..c04c014bd2f 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -67,6 +67,16 @@ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_32BIT = (1 << 6), }; +enum radeon_dependency_flag { + /* Add the dependency to the parallel compute IB only. */ + RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY = 1 << 0, + + /* Instead of waiting for a job to finish execution, the dependency will + * be signaled when the job starts execution. + */ + RADEON_DEPENDENCY_START_FENCE = 1 << 1, +}; + enum radeon_bo_usage { /* bitfield */ RADEON_USAGE_READ = 2, RADEON_USAGE_WRITE = 4, @@ -486,6 +496,27 @@ struct radeon_winsys { void *flush_ctx, bool stop_exec_on_failure); + /** + * Add a parallel compute IB to a gfx IB. It will share the buffer list + * and fence dependencies with the gfx IB. The gfx flush call will submit + * both IBs at the same time. + * + * The compute IB doesn't have an output fence, so the primary IB has + * to use a wait packet for synchronization. + * + * The returned IB is only a stream for writing packets to the new + * IB. Calling other winsys functions with it is not allowed, not even + * "cs_destroy". Use the gfx IB instead. + * + * \param cs Gfx IB + * \param gds_size Number of GDS memory that will be available for this IB. + * \param num_oa_counters Number of GDS ordered append counters that will + * be available for this IB. + */ + struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs, + unsigned gds_size, + unsigned num_oa_counters); + /** * Destroy a command stream. * @@ -608,9 +639,12 @@ struct radeon_winsys { /** * Add a fence dependency to the CS, so that the CS will wait for * the fence before execution. + * + * \param dependency_flags Bitmask of RADEON_DEPENDENCY_* */ void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, - struct pipe_fence_handle *fence); + struct pipe_fence_handle *fence, + unsigned dependency_flags); /** * Signal a syncobj when the CS finishes execution. diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 3d23597413c..e3c1e0959fd 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -178,8 +178,8 @@ static void si_add_fence_dependency(struct si_context *sctx, struct radeon_winsys *ws = sctx->ws; if (sctx->dma_cs) - ws->cs_add_fence_dependency(sctx->dma_cs, fence); - ws->cs_add_fence_dependency(sctx->gfx_cs, fence); + ws->cs_add_fence_dependency(sctx->dma_cs, fence, 0); + ws->cs_add_fence_dependency(sctx->gfx_cs, fence, 0); } static void si_add_syncobj_signal(struct si_context *sctx, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 4a588d52930..1438b1ffe76 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -35,6 +35,14 @@ DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false) +#ifndef AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) +#endif + +#ifndef AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 +#endif + /* FENCES */ static struct pipe_fence_handle * @@ -717,6 +725,7 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib, static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type) { + /* The maximum IB size including all chained IBs. */ switch (ib_type) { case IB_MAIN: /* Smaller submits means the GPU gets busy sooner and there is less @@ -724,6 +733,9 @@ static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type) * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1 */ return 20 * 1024; + case IB_PARALLEL_COMPUTE: + /* Always chain this IB. */ + return UINT_MAX; default: unreachable("bad ib_type"); } @@ -739,12 +751,15 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs, */ struct amdgpu_ib *ib = NULL; struct drm_amdgpu_cs_chunk_ib *info = &cs->csc->ib[ib_type]; - unsigned ib_size = 0; + /* This is the minimum size of a contiguous IB. */ + unsigned ib_size = 4 * 1024 * 4; switch (ib_type) { + case IB_PARALLEL_COMPUTE: + ib = &cs->compute_ib; + break; case IB_MAIN: ib = &cs->main; - ib_size = 4 * 1024 * 4; break; default: unreachable("unhandled IB type"); @@ -866,6 +881,9 @@ static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws, assert(0); } + cs->ib[IB_PARALLEL_COMPUTE].ip_type = AMDGPU_HW_IP_COMPUTE; + cs->ib[IB_PARALLEL_COMPUTE].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE; + memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist)); cs->last_added_bo = NULL; return true; @@ -897,6 +915,8 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs) cleanup_fence_list(&cs->fence_dependencies); cleanup_fence_list(&cs->syncobj_dependencies); cleanup_fence_list(&cs->syncobj_to_signal); + cleanup_fence_list(&cs->compute_fence_dependencies); + cleanup_fence_list(&cs->compute_start_fence_dependencies); cs->num_real_buffers = 0; cs->num_slab_buffers = 0; @@ -916,6 +936,8 @@ static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs) FREE(cs->fence_dependencies.list); FREE(cs->syncobj_dependencies.list); FREE(cs->syncobj_to_signal.list); + FREE(cs->compute_fence_dependencies.list); + FREE(cs->compute_start_fence_dependencies.list); } @@ -949,6 +971,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk); cs->main.ib_type = IB_MAIN; + cs->compute_ib.ib_type = IB_PARALLEL_COMPUTE; if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) { FREE(cs); @@ -976,6 +999,77 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, return &cs->main.base; } +static struct radeon_cmdbuf * +amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib, + unsigned gds_size, unsigned num_oa_counters) +{ + struct amdgpu_cs *cs = (struct amdgpu_cs*)ib; + struct amdgpu_winsys *ws = cs->ctx->ws; + + if (cs->ring_type != RING_GFX) + return NULL; + + /* only one secondary IB can be added */ + if (cs->compute_ib.ib_mapped) + return NULL; + + assert(gds_size || !num_oa_counters); /* OA requires GDS */ + + amdgpu_bo_handle gds_mem = NULL, gds_oa = NULL; + + /* Optionally allocate GDS resources for the IB. */ + if (gds_size) { + struct amdgpu_bo_alloc_request gds_mem_info = {0}, gds_oa_info = {0}; + + gds_mem_info.alloc_size = gds_size; + gds_mem_info.phys_alignment = 4; + gds_mem_info.preferred_heap = AMDGPU_GEM_DOMAIN_GDS; + + gds_oa_info.alloc_size = num_oa_counters; + gds_oa_info.phys_alignment = 1; + gds_oa_info.preferred_heap = AMDGPU_GEM_DOMAIN_OA; + + if (amdgpu_bo_alloc(ws->dev, &gds_mem_info, &gds_mem)) { + fprintf(stderr, "amdgpu: Failed to create a GDS memory buffer.\n"); + return NULL; + } + + if (num_oa_counters && + amdgpu_bo_alloc(ws->dev, &gds_oa_info, &gds_oa)) { + fprintf(stderr, "amdgpu: Failed to create a GDS OA buffer.\n"); + amdgpu_bo_free(gds_mem); + return NULL; + } + } + + /* Allocate the compute IB. */ + if (!amdgpu_get_new_ib(&ws->base, cs, IB_PARALLEL_COMPUTE)) { + if (gds_mem) + amdgpu_bo_free(gds_mem); + if (gds_oa) + amdgpu_bo_free(gds_oa); + return NULL; + } + + if (gds_mem) { + cs->compute_gds_mem = gds_mem; + cs->compute_gds_oa = gds_oa; + + amdgpu_bo_export(gds_mem, amdgpu_bo_handle_type_kms, + &cs->compute_gds_mem_kms_handle); + if (gds_oa) { + amdgpu_bo_export(gds_oa, amdgpu_bo_handle_type_kms, + &cs->compute_gds_oa_kms_handle); + + cs->csc1.ib[IB_PARALLEL_COMPUTE].flags |= + AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID; + cs->csc2.ib[IB_PARALLEL_COMPUTE].flags |= + AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID; + } + } + return &cs->compute_ib.base; +} + static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs) { return true; @@ -1105,6 +1199,11 @@ static void add_fence_to_list(struct amdgpu_fence_list *fences, amdgpu_fence_reference(&fences->list[idx], (struct pipe_fence_handle*)fence); } +/* TODO: recognizing dependencies as no-ops doesn't take the parallel + * compute IB into account. The compute IB won't wait for these. + * Also, the scheduler can execute compute and SDMA IBs on any rings. + * Should we always insert dependencies? + */ static bool is_noop_fence_dependency(struct amdgpu_cs *acs, struct amdgpu_fence *fence) { @@ -1121,7 +1220,8 @@ static bool is_noop_fence_dependency(struct amdgpu_cs *acs, } static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws, - struct pipe_fence_handle *pfence) + struct pipe_fence_handle *pfence, + unsigned dependency_flags) { struct amdgpu_cs *acs = amdgpu_cs(rws); struct amdgpu_cs_context *cs = acs->csc; @@ -1129,6 +1229,21 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws, util_queue_fence_wait(&fence->submitted); + if (dependency_flags & RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY) { + /* Syncobjs are not needed here. */ + assert(!amdgpu_fence_is_syncobj(fence)); + + if (acs->ctx->ws->info.has_scheduled_fence_dependency && + dependency_flags & RADEON_DEPENDENCY_START_FENCE) + add_fence_to_list(&cs->compute_start_fence_dependencies, fence); + else + add_fence_to_list(&cs->compute_fence_dependencies, fence); + return; + } + + /* Start fences are not needed here. */ + assert(!(dependency_flags & RADEON_DEPENDENCY_START_FENCE)); + if (is_noop_fence_dependency(acs, fence)) return; @@ -1325,7 +1440,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) } struct drm_amdgpu_bo_list_entry *list = - alloca(cs->num_real_buffers * sizeof(struct drm_amdgpu_bo_list_entry)); + alloca((cs->num_real_buffers + 2) * sizeof(struct drm_amdgpu_bo_list_entry)); unsigned num_handles = 0; for (i = 0; i < cs->num_real_buffers; ++i) { @@ -1341,6 +1456,18 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) ++num_handles; } + if (acs->compute_gds_mem) { + list[num_handles].bo_handle = acs->compute_gds_mem_kms_handle; + list[num_handles].bo_priority = 0; + ++num_handles; + + if (acs->compute_gds_oa) { + list[num_handles].bo_handle = acs->compute_gds_oa_kms_handle; + list[num_handles].bo_priority = 0; + ++num_handles; + } + } + if (use_bo_list_create) { /* Legacy path creating the buffer list handle and passing it to the CS ioctl. */ r = amdgpu_bo_list_create_raw(ws->dev, num_handles, list, &bo_list); @@ -1418,6 +1545,66 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) num_chunks++; } + /* Submit the parallel compute IB first. */ + if (cs->ib[IB_PARALLEL_COMPUTE].ib_bytes > 0) { + unsigned old_num_chunks = num_chunks; + + /* Add compute fence dependencies. */ + unsigned num_dependencies = cs->compute_fence_dependencies.num; + if (num_dependencies) { + struct drm_amdgpu_cs_chunk_dep *dep_chunk = + alloca(num_dependencies * sizeof(*dep_chunk)); + + for (unsigned i = 0; i < num_dependencies; i++) { + struct amdgpu_fence *fence = + (struct amdgpu_fence*)cs->compute_fence_dependencies.list[i]; + + assert(util_queue_fence_is_signalled(&fence->submitted)); + amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]); + } + + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; + chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies; + chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk; + num_chunks++; + } + + /* Add compute start fence dependencies. */ + unsigned num_start_dependencies = cs->compute_start_fence_dependencies.num; + if (num_start_dependencies) { + struct drm_amdgpu_cs_chunk_dep *dep_chunk = + alloca(num_start_dependencies * sizeof(*dep_chunk)); + + for (unsigned i = 0; i < num_start_dependencies; i++) { + struct amdgpu_fence *fence = + (struct amdgpu_fence*)cs->compute_start_fence_dependencies.list[i]; + + assert(util_queue_fence_is_signalled(&fence->submitted)); + amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]); + } + + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES; + chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_start_dependencies; + chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk; + num_chunks++; + } + + /* Convert from dwords to bytes. */ + cs->ib[IB_PARALLEL_COMPUTE].ib_bytes *= 4; + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; + chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PARALLEL_COMPUTE]; + num_chunks++; + + r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list, + num_chunks, chunks, NULL); + if (r) + goto finalize; + + /* Back off the compute chunks. */ + num_chunks = old_num_chunks; + } + /* Syncobj signals. */ unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num; if (num_syncobj_to_signal) { @@ -1459,6 +1646,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list, num_chunks, chunks, &seq_no); } +finalize: if (r) { if (r == -ENOMEM) @@ -1544,6 +1732,12 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, } if (cs->ring_type == RING_GFX) ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; + + /* Also pad secondary IBs. */ + if (cs->compute_ib.ib_mapped) { + while (cs->compute_ib.base.current.cdw & 7) + radeon_emit(&cs->compute_ib.base, 0xffff1000); /* type3 nop packet */ + } break; case RING_UVD: case RING_UVD_ENC: @@ -1579,6 +1773,9 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, /* Set IB sizes. */ amdgpu_ib_finalize(ws, &cs->main); + if (cs->compute_ib.ib_mapped) + amdgpu_ib_finalize(ws, &cs->compute_ib); + /* Create a fence. */ amdgpu_fence_reference(&cur->fence, NULL); if (cs->next_fence) { @@ -1624,6 +1821,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, } amdgpu_get_new_ib(&ws->base, cs, IB_MAIN); + if (cs->compute_ib.ib_mapped) + amdgpu_get_new_ib(&ws->base, cs, IB_PARALLEL_COMPUTE); cs->main.base.used_gart = 0; cs->main.base.used_vram = 0; @@ -1645,9 +1844,15 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs) p_atomic_dec(&cs->ctx->ws->num_cs); pb_reference(&cs->main.big_ib_buffer, NULL); FREE(cs->main.base.prev); + pb_reference(&cs->compute_ib.big_ib_buffer, NULL); + FREE(cs->compute_ib.base.prev); amdgpu_destroy_cs_context(&cs->csc1); amdgpu_destroy_cs_context(&cs->csc2); amdgpu_fence_reference(&cs->next_fence, NULL); + if (cs->compute_gds_mem) + amdgpu_bo_free(cs->compute_gds_mem); + if (cs->compute_gds_oa) + amdgpu_bo_free(cs->compute_gds_oa); FREE(cs); } @@ -1667,6 +1872,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) ws->base.ctx_destroy = amdgpu_ctx_destroy; ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; ws->base.cs_create = amdgpu_cs_create; + ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib; ws->base.cs_destroy = amdgpu_cs_destroy; ws->base.cs_add_buffer = amdgpu_cs_add_buffer; ws->base.cs_validate = amdgpu_cs_validate; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 4f49a9065c6..474f4211b8e 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -57,6 +57,7 @@ struct amdgpu_cs_buffer { enum ib_type { IB_MAIN, + IB_PARALLEL_COMPUTE, IB_NUM, }; @@ -113,6 +114,10 @@ struct amdgpu_cs_context { struct amdgpu_fence_list syncobj_dependencies; struct amdgpu_fence_list syncobj_to_signal; + /* The compute IB uses the dependencies above + these: */ + struct amdgpu_fence_list compute_fence_dependencies; + struct amdgpu_fence_list compute_start_fence_dependencies; + struct pipe_fence_handle *fence; /* the error returned from cs_flush for non-async submissions */ @@ -121,6 +126,7 @@ struct amdgpu_cs_context { struct amdgpu_cs { struct amdgpu_ib main; /* must be first because this is inherited */ + struct amdgpu_ib compute_ib; /* optional parallel compute IB */ struct amdgpu_ctx *ctx; enum ring_type ring_type; struct drm_amdgpu_cs_chunk_fence fence_chunk; @@ -142,6 +148,11 @@ struct amdgpu_cs { struct util_queue_fence flush_completed; struct pipe_fence_handle *next_fence; + + amdgpu_bo_handle compute_gds_mem; + amdgpu_bo_handle compute_gds_oa; + uint32_t compute_gds_mem_kms_handle; + uint32_t compute_gds_oa_kms_handle; }; struct amdgpu_fence { @@ -220,6 +231,8 @@ amdgpu_cs_from_ib(struct amdgpu_ib *ib) switch (ib->ib_type) { case IB_MAIN: return get_container(ib, struct amdgpu_cs, main); + case IB_PARALLEL_COMPUTE: + return get_container(ib, struct amdgpu_cs, compute_ib); default: unreachable("bad ib_type"); } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 490c246d6e0..2288c320975 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -798,7 +798,8 @@ radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs) static void radeon_drm_cs_add_fence_dependency(struct radeon_cmdbuf *cs, - struct pipe_fence_handle *fence) + struct pipe_fence_handle *fence, + unsigned dependency_flags) { /* TODO: Handle the following unlikely multi-threaded scenario: * -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev