Module: Mesa
Branch: main
Commit: 09ae2c4fee791e3008fbb2f1f218505e571fb29c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=09ae2c4fee791e3008fbb2f1f218505e571fb29c

Author: Mark Collins <[email protected]>
Date:   Tue Sep  6 05:55:04 2022 +0000

tu: Optimize hash_renderpass_instance by removing XXH64_update

It was determined through testing that `XXH64_update` is
significantly slower than calling `XXH64` directly as far as small
data velocity is concerned. This function is called on every RP end
which made it visible while profiling but substantial difference
(measured to be ~4x) made it not show up whatsoever.

Signed-off-by: Mark Collins <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18428>

---

 src/freedreno/vulkan/tu_autotune.c | 36 +++++++++++-------------------------
 src/freedreno/vulkan/tu_pass.c     | 25 ++++++++++++++++++++++++-
 src/freedreno/vulkan/tu_pass.h     |  1 +
 3 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/src/freedreno/vulkan/tu_autotune.c 
b/src/freedreno/vulkan/tu_autotune.c
index a4be26a77ef..3663d79ee75 100644
--- a/src/freedreno/vulkan/tu_autotune.c
+++ b/src/freedreno/vulkan/tu_autotune.c
@@ -144,40 +144,26 @@ free_submission_data(struct tu_submission_data *data)
    free(data);
 }
 
-#define APPEND_TO_HASH(state, field) \
-   XXH64_update(state, &field, sizeof(field));
-
 static uint64_t
 hash_renderpass_instance(const struct tu_render_pass *pass,
                          const struct tu_framebuffer *framebuffer,
                          const struct tu_cmd_buffer *cmd) {
-   XXH64_state_t hash_state;
-   XXH64_reset(&hash_state, 0);
-
-   APPEND_TO_HASH(&hash_state, framebuffer->width);
-   APPEND_TO_HASH(&hash_state, framebuffer->height);
-   APPEND_TO_HASH(&hash_state, framebuffer->layers);
+   uint32_t data[3 + pass->attachment_count * 5];
+   uint32_t* ptr = data;
 
-   APPEND_TO_HASH(&hash_state, pass->attachment_count);
-   XXH64_update(&hash_state, pass->attachments, pass->attachment_count * 
sizeof(pass->attachments[0]));
+   *ptr++ = framebuffer->width;
+   *ptr++ = framebuffer->height;
+   *ptr++ = framebuffer->layers;
 
    for (unsigned i = 0; i < pass->attachment_count; i++) {
-      APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->view.width);
-      APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->view.height);
-      APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->image->vk.format);
-      APPEND_TO_HASH(&hash_state, 
cmd->state.attachments[i]->image->vk.array_layers);
-      APPEND_TO_HASH(&hash_state, 
cmd->state.attachments[i]->image->vk.mip_levels);
-   }
-
-   APPEND_TO_HASH(&hash_state, pass->subpass_count);
-   for (unsigned i = 0; i < pass->subpass_count; i++) {
-      APPEND_TO_HASH(&hash_state, pass->subpasses[i].samples);
-      APPEND_TO_HASH(&hash_state, pass->subpasses[i].input_count);
-      APPEND_TO_HASH(&hash_state, pass->subpasses[i].color_count);
-      APPEND_TO_HASH(&hash_state, pass->subpasses[i].resolve_count);
+      *ptr++ = cmd->state.attachments[i]->view.width;
+      *ptr++ = cmd->state.attachments[i]->view.height;
+      *ptr++ = cmd->state.attachments[i]->image->vk.format;
+      *ptr++ = cmd->state.attachments[i]->image->vk.array_layers;
+      *ptr++ = cmd->state.attachments[i]->image->vk.mip_levels;
    }
 
-   return XXH64_digest(&hash_state);
+   return XXH64(data, sizeof(data), pass->autotune_hash);
 }
 
 static void
diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c
index 38017e68851..84c1c306129 100644
--- a/src/freedreno/vulkan/tu_pass.c
+++ b/src/freedreno/vulkan/tu_pass.c
@@ -510,6 +510,27 @@ static void update_samples(struct tu_subpass *subpass,
    subpass->samples = samples;
 }
 
+static void
+tu_render_pass_calc_hash(struct tu_render_pass *pass)
+{
+   #define HASH(hash, data) XXH64(&(data), sizeof(data), hash)
+
+   uint64_t hash = HASH(0, pass->attachment_count);
+   hash = XXH64(pass->attachments,
+         pass->attachment_count * sizeof(pass->attachments[0]), hash);
+   hash = HASH(hash, pass->subpass_count);
+   for (unsigned i = 0; i < pass->subpass_count; i++) {
+      hash = HASH(hash, pass->subpasses[i].samples);
+      hash = HASH(hash, pass->subpasses[i].input_count);
+      hash = HASH(hash, pass->subpasses[i].color_count);
+      hash = HASH(hash, pass->subpasses[i].resolve_count);
+   }
+
+   pass->autotune_hash = hash;
+
+   #undef HASH
+}
+
 static void
 tu_render_pass_cond_config(struct tu_render_pass *pass)
 {
@@ -926,13 +947,14 @@ tu_CreateRenderPass2(VkDevice _device,
    tu_render_pass_cond_config(pass);
    tu_render_pass_gmem_config(pass, device->physical_device);
    tu_render_pass_bandwidth_config(pass);
+   tu_render_pass_calc_hash(pass);
 
    for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
       tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
    }
 
    tu_render_pass_add_implicit_deps(pass, pCreateInfo);
- 
+
    *pRenderPass = tu_render_pass_to_handle(pass);
 
    return VK_SUCCESS;
@@ -1092,6 +1114,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer 
*cmd_buffer,
    tu_render_pass_cond_config(pass);
    tu_render_pass_gmem_config(pass, device->physical_device);
    tu_render_pass_bandwidth_config(pass);
+   tu_render_pass_calc_hash(pass);
 }
 
 void
diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h
index a47e0e871be..06f1185a115 100644
--- a/src/freedreno/vulkan/tu_pass.h
+++ b/src/freedreno/vulkan/tu_pass.h
@@ -100,6 +100,7 @@ struct tu_render_pass
    uint32_t subpass_count;
    uint32_t gmem_pixels[TU_GMEM_LAYOUT_COUNT];
    uint32_t tile_align_w;
+   uint64_t autotune_hash;
 
    /* memory bandwidth costs (in bytes) for gmem / sysmem rendering */
    uint32_t gmem_bandwidth_per_pixel;

Reply via email to