Module: Mesa
Branch: main
Commit: cb7403b90955308ed76cb7bebe0dbc23f15fac81
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb7403b90955308ed76cb7bebe0dbc23f15fac81

Author: Giancarlo Devich <[email protected]>
Date:   Fri Feb 24 16:20:21 2023 -0800

d3d12: Track up to 16 active context resource states locally in d3d12_bo

After 16 entries, we fall back to the previous logic that used a hash
map to link the resource's state per context.

Preventing hash map churn by cheaply tracking up to 16 context's worth
of states per resource significantly reduces CPU cost in
find_or_create_state_entry

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21528>

---

 src/gallium/drivers/d3d12/d3d12_bufmgr.cpp         | 16 +++++++--
 src/gallium/drivers/d3d12/d3d12_bufmgr.h           |  3 ++
 src/gallium/drivers/d3d12/d3d12_resource_state.cpp | 38 ++++++++++++++--------
 src/gallium/drivers/d3d12/d3d12_resource_state.h   |  3 ++
 4 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp 
b/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp
index 85414ff9a83..4df0bcfb718 100644
--- a/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp
+++ b/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp
@@ -81,9 +81,10 @@ d3d12_bo_wrap_res(struct d3d12_screen *screen, 
ID3D12Resource *res, enum d3d12_r
 {
    struct d3d12_bo *bo;
 
-   bo = CALLOC_STRUCT(d3d12_bo);
+   bo = MALLOC_STRUCT(d3d12_bo);
    if (!bo)
       return NULL;
+   memset(bo, 0, offsetof(d3d12_bo, local_context_states));
 
    D3D12_RESOURCE_DESC desc = GetDesc(res);
    unsigned array_size = desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D 
? 1 : desc.DepthOrArraySize;
@@ -158,9 +159,10 @@ d3d12_bo_wrap_buffer(struct d3d12_screen *screen, struct 
pb_buffer *buf)
 {
    struct d3d12_bo *bo;
 
-   bo = CALLOC_STRUCT(d3d12_bo);
+   bo = MALLOC_STRUCT(d3d12_bo);
    if (!bo)
       return NULL;
+   memset(bo, 0, offsetof(d3d12_bo, local_context_states));
 
    pipe_reference_init(&bo->reference, 1);
    bo->screen = screen;
@@ -193,13 +195,21 @@ d3d12_bo_unreference(struct d3d12_bo *bo)
       /* MSVC's offsetof fails when the name is ambiguous between struct and 
function */
       typedef struct d3d12_context d3d12_context_type;
       list_for_each_entry(d3d12_context_type, ctx, &bo->screen->context_list, 
context_list_entry)
-         util_dynarray_append(&ctx->recently_destroyed_bos, uint64_t, 
bo->unique_id);
+         if (ctx->id == D3D12_CONTEXT_NO_ID)
+            util_dynarray_append(&ctx->recently_destroyed_bos, uint64_t, 
bo->unique_id);
 
       mtx_unlock(&bo->screen->submit_mutex);
 
       d3d12_resource_state_cleanup(&bo->global_state);
       if (bo->res)
          bo->res->Release();
+
+      uint64_t mask = bo->local_context_state_mask;
+      while (mask) {
+         int ctxid = u_bit_scan64(&mask);
+         
d3d12_destroy_context_state_table_entry(&bo->local_context_states[ctxid]);
+      }
+
       FREE(bo);
    }
 }
diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.h 
b/src/gallium/drivers/d3d12/d3d12_bufmgr.h
index d34076f96e6..74639a415e5 100644
--- a/src/gallium/drivers/d3d12/d3d12_bufmgr.h
+++ b/src/gallium/drivers/d3d12/d3d12_bufmgr.h
@@ -59,6 +59,9 @@ struct d3d12_bo {
    int64_t last_used_timestamp;
    uint64_t last_used_fence;
    enum d3d12_residency_status residency_status;
+
+   unsigned local_context_state_mask;
+   d3d12_context_state_table_entry local_context_states[16];
 };
 
 struct d3d12_buffer {
diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp 
b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp
index 65c91a9ce6b..c978b4670d0 100644
--- a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp
+++ b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp
@@ -187,13 +187,12 @@ copy_resource_state(d3d12_resource_state *dest, 
d3d12_resource_state *src)
    }
 }
 
-static void
-destroy_context_state_table_entry(d3d12_context_state_table_entry *entry)
+void
+d3d12_destroy_context_state_table_entry(d3d12_context_state_table_entry *entry)
 {
    desired_resource_state_cleanup(&entry->desired);
    d3d12_resource_state_cleanup(&entry->batch_begin);
    d3d12_resource_state_cleanup(&entry->batch_end);
-   free(entry);
 }
 
 void
@@ -206,8 +205,10 @@ d3d12_context_state_table_init(struct d3d12_context *ctx)
 void
 d3d12_context_state_table_destroy(struct d3d12_context *ctx)
 {
-   hash_table_foreach(ctx->bo_state_table->table, entry)
-      destroy_context_state_table_entry((d3d12_context_state_table_entry 
*)entry->data);
+   hash_table_foreach(ctx->bo_state_table->table, entry) {
+      d3d12_destroy_context_state_table_entry((d3d12_context_state_table_entry 
*)entry->data);
+      free(entry->data);
+   }
    _mesa_hash_table_u64_destroy(ctx->bo_state_table);
    util_dynarray_fini(&ctx->barrier_scratch);
    if (ctx->state_fixup_cmdlist)
@@ -240,17 +241,28 @@ init_state_table_entry(d3d12_context_state_table_entry 
*bo_state, d3d12_bo *bo)
    /* We'll never need state fixups for simultaneous access resources, so 
don't bother initializing this second state */
    if (!supports_simultaneous_access)
       d3d12_resource_state_init(&bo_state->batch_begin, subresource_count, 
supports_simultaneous_access);
+   else
+      memset(&bo_state->batch_begin, 0, sizeof(bo_state->batch_begin));
 }
 
 static d3d12_context_state_table_entry *
-find_or_create_state_entry(struct hash_table_u64 *table, d3d12_bo *bo)
+find_or_create_state_entry(struct d3d12_context *ctx, d3d12_bo *bo)
 {
+   if (ctx->id != D3D12_CONTEXT_NO_ID) {
+      unsigned context_bit = 1 << ctx->id;
+      if ((bo->local_context_state_mask & context_bit) == 0) {
+         init_state_table_entry(&bo->local_context_states[ctx->id], bo);
+         bo->local_context_state_mask |= context_bit;
+      }
+      return &bo->local_context_states[ctx->id];
+   }
+
    d3d12_context_state_table_entry *bo_state =
-      (d3d12_context_state_table_entry *) _mesa_hash_table_u64_search(table, 
bo->unique_id);
+      (d3d12_context_state_table_entry *) 
_mesa_hash_table_u64_search(ctx->bo_state_table, bo->unique_id);
    if (!bo_state) {
       bo_state = CALLOC_STRUCT(d3d12_context_state_table_entry);
       init_state_table_entry(bo_state, bo);
-      _mesa_hash_table_u64_insert(table, bo->unique_id, bo_state);
+      _mesa_hash_table_u64_insert(ctx->bo_state_table, bo->unique_id, 
bo_state);
    }
    return bo_state;
 }
@@ -330,7 +342,7 @@ d3d12_context_state_resolve_submission(struct d3d12_context 
*ctx, struct d3d12_b
    util_dynarray_foreach(&ctx->recently_destroyed_bos, uint64_t, id) {
       void *data = _mesa_hash_table_u64_search(ctx->bo_state_table, *id);
       if (data)
-         destroy_context_state_table_entry((d3d12_context_state_table_entry 
*)data);
+         
d3d12_destroy_context_state_table_entry((d3d12_context_state_table_entry 
*)data);
       _mesa_hash_table_u64_remove(ctx->bo_state_table, *id);
    }
 
@@ -338,7 +350,7 @@ d3d12_context_state_resolve_submission(struct d3d12_context 
*ctx, struct d3d12_b
 
    hash_table_foreach(batch->bos, bo_entry) {
       d3d12_bo *bo = (d3d12_bo *)bo_entry->key;
-      d3d12_context_state_table_entry *bo_state = 
find_or_create_state_entry(ctx->bo_state_table, bo);
+      d3d12_context_state_table_entry *bo_state = 
find_or_create_state_entry(ctx, bo);
       if (!bo_state->batch_end.supports_simultaneous_access) {
          assert(bo->res && bo->global_state.subresource_states);
 
@@ -447,7 +459,7 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
    if (flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS)
       d3d12_invalidate_context_bindings(ctx, res);
 
-   d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx->bo_state_table, res->bo);
+   d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx, res->bo);
    if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
       set_desired_resource_state(&state_entry->desired, state);
       _mesa_set_add(ctx->pending_barriers_bos, res->bo);
@@ -472,7 +484,7 @@ d3d12_transition_subresources_state(struct d3d12_context 
*ctx,
    if(flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS)
       d3d12_invalidate_context_bindings(ctx, res);
 
-   d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx->bo_state_table, res->bo);
+   d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx, res->bo);
    bool is_whole_resource = num_levels * num_layers * num_planes == 
state_entry->batch_end.num_subresources;
    bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
 
@@ -509,7 +521,7 @@ d3d12_apply_resource_states(struct d3d12_context *ctx, bool 
is_implicit_dispatch
    set_foreach_remove(ctx->pending_barriers_bos, entry) {
       d3d12_bo *bo = (d3d12_bo *)entry->key;
 
-      d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx->bo_state_table, bo);
+      d3d12_context_state_table_entry *state_entry = 
find_or_create_state_entry(ctx, bo);
       d3d12_desired_resource_state *destination_state = &state_entry->desired;
       d3d12_resource_state *current_state = &state_entry->batch_end;
 
diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.h 
b/src/gallium/drivers/d3d12/d3d12_resource_state.h
index b85d90df727..d2e208f34ce 100644
--- a/src/gallium/drivers/d3d12/d3d12_resource_state.h
+++ b/src/gallium/drivers/d3d12/d3d12_resource_state.h
@@ -98,4 +98,7 @@ d3d12_context_state_table_destroy(struct d3d12_context *ctx);
 bool
 d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct 
d3d12_batch *batch);
 
+void
+d3d12_destroy_context_state_table_entry(d3d12_context_state_table_entry* 
entry);
+
 #endif // D3D12_RESOURCE_STATE_H

Reply via email to