Module: Mesa
Branch: main
Commit: 56589cb02cf5c9b9b469360413ceb7681d3caf86
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=56589cb02cf5c9b9b469360413ceb7681d3caf86

Author: Jesse Natalie <jenat...@microsoft.com>
Date:   Tue Nov  7 13:08:04 2023 -0800

d3d12: Change memory barrier implementation

Normally, we insert barriers automatically based on bind points. If
a resource is bound as both a shader image/SSBO (UAV) and other read-only
bind points, we'll choose the UAV state. This behavior is modified by
the memory barrier API - if the memory barrier indicates that a read
is desired, previously, we would've just not put any barriers to UAV
states. Now, we just use that state to disambiguate between read states
vs write states for a resource that's bound as both.

This turns out to be problematic in some circumstances, e.g.:
* PBO download, which does a memory barrier afterwards, since it uses
  shader images to write, and those might be accessed immediately as
  a texture afterwards.
* A user-issued draw that writes to a SSBO. This should indicate in the
  state tracker that the SSBO is in the UAV state.
* Some other op, like a copy, that writes to the SSBO.

Before, the "pending memory barrier" state would cause the user draw to
not be put in the UAV state, which means that the copy wouldn't issue a
barrier *out* of the UAV state, resulting in the copy being unsynchronized.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26104>

---

 src/gallium/drivers/d3d12/d3d12_context.h          |  1 +
 src/gallium/drivers/d3d12/d3d12_draw.cpp           | 32 +++++++--------
 src/gallium/drivers/d3d12/d3d12_resource_state.cpp | 48 ++++++++++++++--------
 src/gallium/drivers/d3d12/d3d12_resource_state.h   |  1 +
 4 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/d3d12/d3d12_context.h 
b/src/gallium/drivers/d3d12/d3d12_context.h
index 89ff68477de..3f92670031d 100644
--- a/src/gallium/drivers/d3d12/d3d12_context.h
+++ b/src/gallium/drivers/d3d12/d3d12_context.h
@@ -333,6 +333,7 @@ enum d3d12_transition_flags {
    D3D12_TRANSITION_FLAG_NONE = 0,
    D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS = 1,
    D3D12_TRANSITION_FLAG_ACCUMULATE_STATE = 2,
+   D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER = 4,
 };
 
 void
diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp 
b/src/gallium/drivers/d3d12/d3d12_draw.cpp
index 65fc0d8e1b2..5f4e4f75507 100644
--- a/src/gallium/drivers/d3d12/d3d12_draw.cpp
+++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp
@@ -326,23 +326,23 @@ fill_image_descriptors(struct d3d12_context *ctx,
             unreachable("Unexpected image view dimension");
          }
          
-         if (!batch->pending_memory_barrier) {
-            if (res->base.b.target == PIPE_BUFFER) {
-               d3d12_transition_resource_state(ctx, res, 
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
-            } else {
-               unsigned transition_first_layer = view->u.tex.first_layer;
-               unsigned transition_array_size = array_size;
-               if (res->base.b.target == PIPE_TEXTURE_3D) {
-                  transition_first_layer = 0;
-                  transition_array_size = 0;
-               }
-               d3d12_transition_subresources_state(ctx, res,
-                                                   view->u.tex.level, 1,
-                                                   transition_first_layer, 
transition_array_size,
-                                                   0, 1,
-                                                   
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
-                                                   
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
+         d3d12_transition_flags transition_flags = 
(d3d12_transition_flags)(D3D12_TRANSITION_FLAG_ACCUMULATE_STATE |
+            (batch->pending_memory_barrier ? 
D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER : 0));
+         if (res->base.b.target == PIPE_BUFFER) {
+            d3d12_transition_resource_state(ctx, res, 
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, transition_flags);
+         } else {
+            unsigned transition_first_layer = view->u.tex.first_layer;
+            unsigned transition_array_size = array_size;
+            if (res->base.b.target == PIPE_TEXTURE_3D) {
+               transition_first_layer = 0;
+               transition_array_size = 0;
             }
+            d3d12_transition_subresources_state(ctx, res,
+                                                view->u.tex.level, 1,
+                                                transition_first_layer, 
transition_array_size,
+                                                0, 1,
+                                                
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+                                                transition_flags);
          }
          d3d12_batch_reference_resource(batch, res, true);
 
diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp 
b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp
index 970c6ec4571..4209bfb7e79 100644
--- a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp
+++ b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp
@@ -70,17 +70,20 @@ update_subresource_state(D3D12_RESOURCE_STATES 
*existing_state, D3D12_RESOURCE_S
 }
 
 static void
-set_desired_resource_state(d3d12_desired_resource_state *state_obj, 
D3D12_RESOURCE_STATES state)
+set_desired_resource_state(d3d12_desired_resource_state *state_obj, 
D3D12_RESOURCE_STATES state, bool pending_memory_barrier)
 {
    state_obj->homogenous = true;
+   state_obj->pending_memory_barrier |= pending_memory_barrier;
    update_subresource_state(&state_obj->subresource_states[0], state);
 }
 
 static void
 set_desired_subresource_state(d3d12_desired_resource_state *state_obj,
-                                    uint32_t subresource,
-                                    D3D12_RESOURCE_STATES state)
+                              uint32_t subresource,
+                              D3D12_RESOURCE_STATES state,
+                              bool pending_memory_barrier)
 {
+   state_obj->pending_memory_barrier |= pending_memory_barrier;
    if (state_obj->homogenous && state_obj->num_subresources > 1) {
       for (unsigned i = 1; i < state_obj->num_subresources; ++i) {
          state_obj->subresource_states[i] = state_obj->subresource_states[0];
@@ -94,7 +97,8 @@ set_desired_subresource_state(d3d12_desired_resource_state 
*state_obj,
 static void
 reset_desired_resource_state(d3d12_desired_resource_state *state_obj)
 {
-   set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE);
+   set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE, false);
+   state_obj->pending_memory_barrier = false;
 }
 
 bool
@@ -389,7 +393,8 @@ append_barrier(struct d3d12_context *ctx,
                d3d12_context_state_table_entry *state_entry,
                D3D12_RESOURCE_STATES after,
                UINT subresource,
-               bool is_implicit_dispatch)
+               bool is_implicit_dispatch,
+               bool pending_memory_barrier)
 {
    uint64_t offset;
    ID3D12Resource *res = d3d12_bo_get_base(bo, &offset)->res;
@@ -400,13 +405,18 @@ append_barrier(struct d3d12_context *ctx,
    transition_desc.Transition.Subresource = subresource;
 
    // This is a transition into a state that is both write and non-write.
-   // This is invalid according to D3D12. We're venturing into undefined 
behavior
-   // land, but let's just pick the write state.
+   // This is invalid according to D3D12. If there's a pending memory barrier 
that
+   // indicates we'll be reading from such a resource, then let the read state 
win.
+   // Otherwise, pick the write state.
    if (d3d12_is_write_state(after) && (after & ~RESOURCE_STATE_ALL_WRITE_BITS) 
!= 0) {
-      after &= RESOURCE_STATE_ALL_WRITE_BITS;
+      if (pending_memory_barrier) {
+         after &= ~RESOURCE_STATE_ALL_WRITE_BITS;
+      } else {
+         after &= RESOURCE_STATE_ALL_WRITE_BITS;
 
-      // For now, this is the only way I've seen where this can happen.
-      assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+         // For now, this is the only way I've seen where this can happen.
+         assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+      }
    }
 
    assert((subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES && 
current_state->homogenous) ||
@@ -466,8 +476,9 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
       d3d12_invalidate_context_bindings(ctx, res);
 
    d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx, res->bo);
+   bool pending_memory_barrier = (flags & 
D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0;
    if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
-      set_desired_resource_state(&state_entry->desired, state);
+      set_desired_resource_state(&state_entry->desired, state, 
pending_memory_barrier);
 
       if (ctx->id != D3D12_CONTEXT_NO_ID) {
          if ((res->bo->local_needs_resolve_state & (1 << ctx->id)) == 0) {
@@ -479,10 +490,10 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
          _mesa_set_add(ctx->pending_barriers_bos, res->bo);
 
    } else if (state_entry->batch_end.homogenous) {
-      append_barrier(ctx, res->bo, state_entry, state, 
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
+      append_barrier(ctx, res->bo, state_entry, state, 
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier);
    } else {
       for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) {
-         append_barrier(ctx, res->bo, state_entry, state, i, false);
+         append_barrier(ctx, res->bo, state_entry, state, i, false, 
pending_memory_barrier);
       }
    }
 }
@@ -502,11 +513,12 @@ d3d12_transition_subresources_state(struct d3d12_context 
*ctx,
    d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx, res->bo);
    bool is_whole_resource = num_levels * num_layers * num_planes == 
state_entry->batch_end.num_subresources;
    bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
+   bool pending_memory_barrier = (flags & 
D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0;
 
    if (is_whole_resource && is_accumulate) {
-      set_desired_resource_state(&state_entry->desired, state);
+      set_desired_resource_state(&state_entry->desired, state, 
pending_memory_barrier);
    } else if (is_whole_resource && state_entry->batch_end.homogenous) {
-      append_barrier(ctx, res->bo, state_entry, state, 
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
+      append_barrier(ctx, res->bo, state_entry, state, 
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier);
    } else {
       for (uint32_t l = 0; l < num_levels; l++) {
          const uint32_t level = start_level + l;
@@ -518,9 +530,9 @@ d3d12_transition_subresources_state(struct d3d12_context 
*ctx,
                   level + (layer * res->mip_levels) + plane * (res->mip_levels 
* res->base.b.array_size);
                assert(subres_id < state_entry->desired.num_subresources);
                if (is_accumulate)
-                  set_desired_subresource_state(&state_entry->desired, 
subres_id, state);
+                  set_desired_subresource_state(&state_entry->desired, 
subres_id, state, pending_memory_barrier);
                else
-                  append_barrier(ctx, res->bo, state_entry, state, subres_id, 
false);
+                  append_barrier(ctx, res->bo, state_entry, state, subres_id, 
false, pending_memory_barrier);
             }
          }
       }
@@ -558,7 +570,7 @@ static void apply_resource_state(struct d3d12_context *ctx, 
bool is_implicit_dis
          continue;
       }
 
-      append_barrier(ctx, bo, state_entry, after, subresource, 
is_implicit_dispatch);
+      append_barrier(ctx, bo, state_entry, after, subresource, 
is_implicit_dispatch, state_entry->desired.pending_memory_barrier);
    }
 
    // Update destination states.
diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.h 
b/src/gallium/drivers/d3d12/d3d12_resource_state.h
index d2e208f34ce..5a517c76a17 100644
--- a/src/gallium/drivers/d3d12/d3d12_resource_state.h
+++ b/src/gallium/drivers/d3d12/d3d12_resource_state.h
@@ -72,6 +72,7 @@ struct d3d12_resource_state
 struct d3d12_desired_resource_state
 {
    bool homogenous;
+   bool pending_memory_barrier;
    uint32_t num_subresources;
    D3D12_RESOURCE_STATES* subresource_states;
 };

Reply via email to