Module: Mesa
Branch: main
Commit: 31f720fd6e3f6f5a56597b952cd82696f3b26837
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=31f720fd6e3f6f5a56597b952cd82696f3b26837

Author: Paulo Zanoni <paulo.r.zan...@intel.com>
Date:   Mon Oct 23 15:35:42 2023 -0700

anv/trtt: join L1 writes into a single MI_STORE_DATA_IMM when possible

If the addresses are sequential, we can emit only a single
MI_STORE_DATA_IMM instruction. This is a very common case, it should
save us some space: 4 bytes per extra_write.

Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zan...@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25512>

---

 src/intel/vulkan/genX_cmd_buffer.c | 42 ++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 496b7ed8557..99e0d4acea2 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -8441,6 +8441,15 @@ genX(write_trtt_entries)(struct anv_trtt_submission 
*submit)
       .end = (void *)cmds + batch_size,
    };
 
+   /* BSpec says:
+    *   "DWord Length programmed must not exceed 0x3FE."
+    * For a single dword write the programmed length is 2, and for a single
+    * qword it's 3. This is the value we actually write to the register field,
+    * so it's not considering the bias.
+    */
+   uint32_t dword_write_len = 2;
+   uint32_t max_dword_extra_writes = 0x3FE - dword_write_len;
+
    /* TODO: writes to contiguous addresses can be combined into a single big
     * MI_STORE_DATA_IMM instruction.
     */
@@ -8458,14 +8467,35 @@ genX(write_trtt_entries)(struct anv_trtt_submission 
*submit)
    }
 
    for (int i = 0; i < submit->l1_binds_len; i++) {
-      bool is_last_write = i + 1 == submit->l1_binds_len;
+      int extra_writes = 0;
+      for (int j = i + 1;
+           j < submit->l1_binds_len && extra_writes <= max_dword_extra_writes;
+           j++) {
+         if (submit->l1_binds[i].pte_addr + (j - i) * 4 ==
+             submit->l1_binds[j].pte_addr) {
+            extra_writes++;
+         } else {
+            break;
+         }
+      }
+
+      bool is_last_write = i + extra_writes + 1 == submit->l1_binds_len;
 
-      anv_batch_emit(&batch, GENX(MI_STORE_DATA_IMM), sdi) {
-         sdi.ForceWriteCompletionCheck = is_last_write;
-         sdi.Address = anv_address_from_u64(submit->l1_binds[i].pte_addr);
-         sdi.ImmediateData =
-            (submit->l1_binds[i].entry_addr >> 16) & 0xFFFFFFFF;
+      uint32_t dword_full_len = GENX(MI_STORE_DATA_IMM_length_bias) +
+                                dword_write_len + extra_writes;
+      uint32_t *dw;
+      dw = anv_batch_emitn(&batch, dword_full_len, GENX(MI_STORE_DATA_IMM),
+         .ForceWriteCompletionCheck = is_last_write,
+         .Address = anv_address_from_u64(submit->l1_binds[i].pte_addr),
+      );
+      dw += 3;
+      for (int j = 0; j < extra_writes + 1; j++) {
+         *dw = (submit->l1_binds[i + j].entry_addr >> 16) & 0xFFFFFFFF;
+         dw++;
       }
+      assert(dw == batch.next);
+
+      i += extra_writes;
    }
 
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);

Reply via email to