Module: Mesa Branch: main Commit: cb7403b90955308ed76cb7bebe0dbc23f15fac81 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb7403b90955308ed76cb7bebe0dbc23f15fac81
Author: Giancarlo Devich <[email protected]> Date: Fri Feb 24 16:20:21 2023 -0800 d3d12: Track up to 16 active context resource states locally in d3d12_bo After 16 entries, we fall back to the previous logic that used a hash map to link the resource's state per context. Preventing hash map churn by cheaply tracking up to 16 context's worth of states per resource significantly reduces CPU cost in find_or_create_state_entry Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21528> --- src/gallium/drivers/d3d12/d3d12_bufmgr.cpp | 16 +++++++-- src/gallium/drivers/d3d12/d3d12_bufmgr.h | 3 ++ src/gallium/drivers/d3d12/d3d12_resource_state.cpp | 38 ++++++++++++++-------- src/gallium/drivers/d3d12/d3d12_resource_state.h | 3 ++ 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp b/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp index 85414ff9a83..4df0bcfb718 100644 --- a/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp +++ b/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp @@ -81,9 +81,10 @@ d3d12_bo_wrap_res(struct d3d12_screen *screen, ID3D12Resource *res, enum d3d12_r { struct d3d12_bo *bo; - bo = CALLOC_STRUCT(d3d12_bo); + bo = MALLOC_STRUCT(d3d12_bo); if (!bo) return NULL; + memset(bo, 0, offsetof(d3d12_bo, local_context_states)); D3D12_RESOURCE_DESC desc = GetDesc(res); unsigned array_size = desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? 1 : desc.DepthOrArraySize; @@ -158,9 +159,10 @@ d3d12_bo_wrap_buffer(struct d3d12_screen *screen, struct pb_buffer *buf) { struct d3d12_bo *bo; - bo = CALLOC_STRUCT(d3d12_bo); + bo = MALLOC_STRUCT(d3d12_bo); if (!bo) return NULL; + memset(bo, 0, offsetof(d3d12_bo, local_context_states)); pipe_reference_init(&bo->reference, 1); bo->screen = screen; @@ -193,13 +195,21 @@ d3d12_bo_unreference(struct d3d12_bo *bo) /* MSVC's offsetof fails when the name is ambiguous between struct and function */ typedef struct d3d12_context d3d12_context_type; list_for_each_entry(d3d12_context_type, ctx, &bo->screen->context_list, context_list_entry) - util_dynarray_append(&ctx->recently_destroyed_bos, uint64_t, bo->unique_id); + if (ctx->id == D3D12_CONTEXT_NO_ID) + util_dynarray_append(&ctx->recently_destroyed_bos, uint64_t, bo->unique_id); mtx_unlock(&bo->screen->submit_mutex); d3d12_resource_state_cleanup(&bo->global_state); if (bo->res) bo->res->Release(); + + uint64_t mask = bo->local_context_state_mask; + while (mask) { + int ctxid = u_bit_scan64(&mask); + d3d12_destroy_context_state_table_entry(&bo->local_context_states[ctxid]); + } + FREE(bo); } } diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.h b/src/gallium/drivers/d3d12/d3d12_bufmgr.h index d34076f96e6..74639a415e5 100644 --- a/src/gallium/drivers/d3d12/d3d12_bufmgr.h +++ b/src/gallium/drivers/d3d12/d3d12_bufmgr.h @@ -59,6 +59,9 @@ struct d3d12_bo { int64_t last_used_timestamp; uint64_t last_used_fence; enum d3d12_residency_status residency_status; + + unsigned local_context_state_mask; + d3d12_context_state_table_entry local_context_states[16]; }; struct d3d12_buffer { diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp index 65c91a9ce6b..c978b4670d0 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp +++ b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp @@ -187,13 +187,12 @@ copy_resource_state(d3d12_resource_state *dest, d3d12_resource_state *src) } } -static void -destroy_context_state_table_entry(d3d12_context_state_table_entry *entry) +void +d3d12_destroy_context_state_table_entry(d3d12_context_state_table_entry *entry) { desired_resource_state_cleanup(&entry->desired); d3d12_resource_state_cleanup(&entry->batch_begin); d3d12_resource_state_cleanup(&entry->batch_end); - free(entry); } void @@ -206,8 +205,10 @@ d3d12_context_state_table_init(struct d3d12_context *ctx) void d3d12_context_state_table_destroy(struct d3d12_context *ctx) { - hash_table_foreach(ctx->bo_state_table->table, entry) - destroy_context_state_table_entry((d3d12_context_state_table_entry *)entry->data); + hash_table_foreach(ctx->bo_state_table->table, entry) { + d3d12_destroy_context_state_table_entry((d3d12_context_state_table_entry *)entry->data); + free(entry->data); + } _mesa_hash_table_u64_destroy(ctx->bo_state_table); util_dynarray_fini(&ctx->barrier_scratch); if (ctx->state_fixup_cmdlist) @@ -240,17 +241,28 @@ init_state_table_entry(d3d12_context_state_table_entry *bo_state, d3d12_bo *bo) /* We'll never need state fixups for simultaneous access resources, so don't bother initializing this second state */ if (!supports_simultaneous_access) d3d12_resource_state_init(&bo_state->batch_begin, subresource_count, supports_simultaneous_access); + else + memset(&bo_state->batch_begin, 0, sizeof(bo_state->batch_begin)); } static d3d12_context_state_table_entry * -find_or_create_state_entry(struct hash_table_u64 *table, d3d12_bo *bo) +find_or_create_state_entry(struct d3d12_context *ctx, d3d12_bo *bo) { + if (ctx->id != D3D12_CONTEXT_NO_ID) { + unsigned context_bit = 1 << ctx->id; + if ((bo->local_context_state_mask & context_bit) == 0) { + init_state_table_entry(&bo->local_context_states[ctx->id], bo); + bo->local_context_state_mask |= context_bit; + } + return &bo->local_context_states[ctx->id]; + } + d3d12_context_state_table_entry *bo_state = - (d3d12_context_state_table_entry *) _mesa_hash_table_u64_search(table, bo->unique_id); + (d3d12_context_state_table_entry *) _mesa_hash_table_u64_search(ctx->bo_state_table, bo->unique_id); if (!bo_state) { bo_state = CALLOC_STRUCT(d3d12_context_state_table_entry); init_state_table_entry(bo_state, bo); - _mesa_hash_table_u64_insert(table, bo->unique_id, bo_state); + _mesa_hash_table_u64_insert(ctx->bo_state_table, bo->unique_id, bo_state); } return bo_state; } @@ -330,7 +342,7 @@ d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_b util_dynarray_foreach(&ctx->recently_destroyed_bos, uint64_t, id) { void *data = _mesa_hash_table_u64_search(ctx->bo_state_table, *id); if (data) - destroy_context_state_table_entry((d3d12_context_state_table_entry *)data); + d3d12_destroy_context_state_table_entry((d3d12_context_state_table_entry *)data); _mesa_hash_table_u64_remove(ctx->bo_state_table, *id); } @@ -338,7 +350,7 @@ d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_b hash_table_foreach(batch->bos, bo_entry) { d3d12_bo *bo = (d3d12_bo *)bo_entry->key; - d3d12_context_state_table_entry *bo_state = find_or_create_state_entry(ctx->bo_state_table, bo); + d3d12_context_state_table_entry *bo_state = find_or_create_state_entry(ctx, bo); if (!bo_state->batch_end.supports_simultaneous_access) { assert(bo->res && bo->global_state.subresource_states); @@ -447,7 +459,7 @@ d3d12_transition_resource_state(struct d3d12_context *ctx, if (flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS) d3d12_invalidate_context_bindings(ctx, res); - d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo); + d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo); if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) { set_desired_resource_state(&state_entry->desired, state); _mesa_set_add(ctx->pending_barriers_bos, res->bo); @@ -472,7 +484,7 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx, if(flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS) d3d12_invalidate_context_bindings(ctx, res); - d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo); + d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo); bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources; bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0; @@ -509,7 +521,7 @@ d3d12_apply_resource_states(struct d3d12_context *ctx, bool is_implicit_dispatch set_foreach_remove(ctx->pending_barriers_bos, entry) { d3d12_bo *bo = (d3d12_bo *)entry->key; - d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, bo); + d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, bo); d3d12_desired_resource_state *destination_state = &state_entry->desired; d3d12_resource_state *current_state = &state_entry->batch_end; diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.h b/src/gallium/drivers/d3d12/d3d12_resource_state.h index b85d90df727..d2e208f34ce 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource_state.h +++ b/src/gallium/drivers/d3d12/d3d12_resource_state.h @@ -98,4 +98,7 @@ d3d12_context_state_table_destroy(struct d3d12_context *ctx); bool d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_batch *batch); +void +d3d12_destroy_context_state_table_entry(d3d12_context_state_table_entry* entry); + #endif // D3D12_RESOURCE_STATE_H
