Module: Mesa Branch: main Commit: 56589cb02cf5c9b9b469360413ceb7681d3caf86 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=56589cb02cf5c9b9b469360413ceb7681d3caf86
Author: Jesse Natalie <jenat...@microsoft.com> Date: Tue Nov 7 13:08:04 2023 -0800 d3d12: Change memory barrier implementation Normally, we insert barriers automatically based on bind points. If a resource is bound as both a shader image/SSBO (UAV) and other read-only bind points, we'll choose the UAV state. This behavior is modified by the memory barrier API - if the memory barrier indicates that a read is desired, previously, we would've just not put any barriers to UAV states. Now, we just use that state to disambiguate between read states vs write states for a resource that's bound as both. This turns out to be problematic in some circumstances, e.g.: * PBO download, which does a memory barrier afterwards, since it uses shader images to write, and those might be accessed immediately as a texture afterwards. * A user-issued draw that writes to a SSBO. This should indicate in the state tracker that the SSBO is in the UAV state. * Some other op, like a copy, that writes to the SSBO. Before, the "pending memory barrier" state would cause the user draw to not be put in the UAV state, which means that the copy wouldn't issue a barrier *out* of the UAV state, resulting in the copy being unsynchronized. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26104> --- src/gallium/drivers/d3d12/d3d12_context.h | 1 + src/gallium/drivers/d3d12/d3d12_draw.cpp | 32 +++++++-------- src/gallium/drivers/d3d12/d3d12_resource_state.cpp | 48 ++++++++++++++-------- src/gallium/drivers/d3d12/d3d12_resource_state.h | 1 + 4 files changed, 48 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_context.h b/src/gallium/drivers/d3d12/d3d12_context.h index 89ff68477de..3f92670031d 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.h +++ b/src/gallium/drivers/d3d12/d3d12_context.h @@ -333,6 +333,7 @@ enum d3d12_transition_flags { D3D12_TRANSITION_FLAG_NONE = 0, D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS = 1, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE = 2, + D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER = 4, }; void diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp index 65fc0d8e1b2..5f4e4f75507 100644 --- a/src/gallium/drivers/d3d12/d3d12_draw.cpp +++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp @@ -326,23 +326,23 @@ fill_image_descriptors(struct d3d12_context *ctx, unreachable("Unexpected image view dimension"); } - if (!batch->pending_memory_barrier) { - if (res->base.b.target == PIPE_BUFFER) { - d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); - } else { - unsigned transition_first_layer = view->u.tex.first_layer; - unsigned transition_array_size = array_size; - if (res->base.b.target == PIPE_TEXTURE_3D) { - transition_first_layer = 0; - transition_array_size = 0; - } - d3d12_transition_subresources_state(ctx, res, - view->u.tex.level, 1, - transition_first_layer, transition_array_size, - 0, 1, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); + d3d12_transition_flags transition_flags = (d3d12_transition_flags)(D3D12_TRANSITION_FLAG_ACCUMULATE_STATE | + (batch->pending_memory_barrier ? D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER : 0)); + if (res->base.b.target == PIPE_BUFFER) { + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, transition_flags); + } else { + unsigned transition_first_layer = view->u.tex.first_layer; + unsigned transition_array_size = array_size; + if (res->base.b.target == PIPE_TEXTURE_3D) { + transition_first_layer = 0; + transition_array_size = 0; } + d3d12_transition_subresources_state(ctx, res, + view->u.tex.level, 1, + transition_first_layer, transition_array_size, + 0, 1, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + transition_flags); } d3d12_batch_reference_resource(batch, res, true); diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp index 970c6ec4571..4209bfb7e79 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp +++ b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp @@ -70,17 +70,20 @@ update_subresource_state(D3D12_RESOURCE_STATES *existing_state, D3D12_RESOURCE_S } static void -set_desired_resource_state(d3d12_desired_resource_state *state_obj, D3D12_RESOURCE_STATES state) +set_desired_resource_state(d3d12_desired_resource_state *state_obj, D3D12_RESOURCE_STATES state, bool pending_memory_barrier) { state_obj->homogenous = true; + state_obj->pending_memory_barrier |= pending_memory_barrier; update_subresource_state(&state_obj->subresource_states[0], state); } static void set_desired_subresource_state(d3d12_desired_resource_state *state_obj, - uint32_t subresource, - D3D12_RESOURCE_STATES state) + uint32_t subresource, + D3D12_RESOURCE_STATES state, + bool pending_memory_barrier) { + state_obj->pending_memory_barrier |= pending_memory_barrier; if (state_obj->homogenous && state_obj->num_subresources > 1) { for (unsigned i = 1; i < state_obj->num_subresources; ++i) { state_obj->subresource_states[i] = state_obj->subresource_states[0]; @@ -94,7 +97,8 @@ set_desired_subresource_state(d3d12_desired_resource_state *state_obj, static void reset_desired_resource_state(d3d12_desired_resource_state *state_obj) { - set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE); + set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE, false); + state_obj->pending_memory_barrier = false; } bool @@ -389,7 +393,8 @@ append_barrier(struct d3d12_context *ctx, d3d12_context_state_table_entry *state_entry, D3D12_RESOURCE_STATES after, UINT subresource, - bool is_implicit_dispatch) + bool is_implicit_dispatch, + bool pending_memory_barrier) { uint64_t offset; ID3D12Resource *res = d3d12_bo_get_base(bo, &offset)->res; @@ -400,13 +405,18 @@ append_barrier(struct d3d12_context *ctx, transition_desc.Transition.Subresource = subresource; // This is a transition into a state that is both write and non-write. - // This is invalid according to D3D12. We're venturing into undefined behavior - // land, but let's just pick the write state. + // This is invalid according to D3D12. If there's a pending memory barrier that + // indicates we'll be reading from such a resource, then let the read state win. + // Otherwise, pick the write state. if (d3d12_is_write_state(after) && (after & ~RESOURCE_STATE_ALL_WRITE_BITS) != 0) { - after &= RESOURCE_STATE_ALL_WRITE_BITS; + if (pending_memory_barrier) { + after &= ~RESOURCE_STATE_ALL_WRITE_BITS; + } else { + after &= RESOURCE_STATE_ALL_WRITE_BITS; - // For now, this is the only way I've seen where this can happen. - assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + // For now, this is the only way I've seen where this can happen. + assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } } assert((subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES && current_state->homogenous) || @@ -466,8 +476,9 @@ d3d12_transition_resource_state(struct d3d12_context *ctx, d3d12_invalidate_context_bindings(ctx, res); d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo); + bool pending_memory_barrier = (flags & D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0; if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) { - set_desired_resource_state(&state_entry->desired, state); + set_desired_resource_state(&state_entry->desired, state, pending_memory_barrier); if (ctx->id != D3D12_CONTEXT_NO_ID) { if ((res->bo->local_needs_resolve_state & (1 << ctx->id)) == 0) { @@ -479,10 +490,10 @@ d3d12_transition_resource_state(struct d3d12_context *ctx, _mesa_set_add(ctx->pending_barriers_bos, res->bo); } else if (state_entry->batch_end.homogenous) { - append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false); + append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier); } else { for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) { - append_barrier(ctx, res->bo, state_entry, state, i, false); + append_barrier(ctx, res->bo, state_entry, state, i, false, pending_memory_barrier); } } } @@ -502,11 +513,12 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx, d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo); bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources; bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0; + bool pending_memory_barrier = (flags & D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0; if (is_whole_resource && is_accumulate) { - set_desired_resource_state(&state_entry->desired, state); + set_desired_resource_state(&state_entry->desired, state, pending_memory_barrier); } else if (is_whole_resource && state_entry->batch_end.homogenous) { - append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false); + append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier); } else { for (uint32_t l = 0; l < num_levels; l++) { const uint32_t level = start_level + l; @@ -518,9 +530,9 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx, level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.b.array_size); assert(subres_id < state_entry->desired.num_subresources); if (is_accumulate) - set_desired_subresource_state(&state_entry->desired, subres_id, state); + set_desired_subresource_state(&state_entry->desired, subres_id, state, pending_memory_barrier); else - append_barrier(ctx, res->bo, state_entry, state, subres_id, false); + append_barrier(ctx, res->bo, state_entry, state, subres_id, false, pending_memory_barrier); } } } @@ -558,7 +570,7 @@ static void apply_resource_state(struct d3d12_context *ctx, bool is_implicit_dis continue; } - append_barrier(ctx, bo, state_entry, after, subresource, is_implicit_dispatch); + append_barrier(ctx, bo, state_entry, after, subresource, is_implicit_dispatch, state_entry->desired.pending_memory_barrier); } // Update destination states. diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.h b/src/gallium/drivers/d3d12/d3d12_resource_state.h index d2e208f34ce..5a517c76a17 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource_state.h +++ b/src/gallium/drivers/d3d12/d3d12_resource_state.h @@ -72,6 +72,7 @@ struct d3d12_resource_state struct d3d12_desired_resource_state { bool homogenous; + bool pending_memory_barrier; uint32_t num_subresources; D3D12_RESOURCE_STATES* subresource_states; };