Module: Mesa
Branch: main
Commit: b4fef9a7452531920dc1a49ad2e76df7ebf4fa42
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b4fef9a7452531920dc1a49ad2e76df7ebf4fa42

Author: Paulo Zanoni <paulo.r.zan...@intel.com>
Date:   Mon Oct 23 15:42:31 2023 -0700

anv/trtt: also join the L3/L2 writes into a single MI_STORE_DATA_IMM

Same as the L1 case, but this one deals with 64bit entry addresses and
pte addresses.

Consecutive L3/L2 writes are much rarer than L1 writes since they
require some pretty big buffers, but we can still those cases in the
wild. I just don't think any change will be noticeable though.

Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zan...@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25512>

---

 src/intel/vulkan/genX_cmd_buffer.c | 44 +++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 99e0d4acea2..e04628f0445 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -8448,22 +8448,50 @@ genX(write_trtt_entries)(struct anv_trtt_submission 
*submit)
     * so it's not considering the bias.
     */
    uint32_t dword_write_len = 2;
+   uint32_t qword_write_len = 3;
    uint32_t max_dword_extra_writes = 0x3FE - dword_write_len;
+   uint32_t max_qword_extra_writes = (0x3FE - qword_write_len) / 2;
 
-   /* TODO: writes to contiguous addresses can be combined into a single big
-    * MI_STORE_DATA_IMM instruction.
+   /* What makes the code below quite complicated is the fact that we can
+    * write multiple values with MI_STORE_DATA_IMM as long as the writes go to
+    * contiguous addresses.
     */
 
    for (int i = 0; i < submit->l3l2_binds_len; i++) {
+      int extra_writes = 0;
+      for (int j = i + 1;
+           j < submit->l3l2_binds_len &&
+            extra_writes <= max_qword_extra_writes;
+           j++) {
+         if (submit->l3l2_binds[i].pte_addr + (j - i) * 8 ==
+             submit->l3l2_binds[j].pte_addr) {
+            extra_writes++;
+         } else {
+            break;
+         }
+      }
       bool is_last_write = submit->l1_binds_len == 0 &&
-                           i + 1 == submit->l3l2_binds_len;
+                           i + extra_writes + 1 == submit->l3l2_binds_len;
 
-      anv_batch_emitn(&batch, 5, GENX(MI_STORE_DATA_IMM),
+      uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
+                           qword_write_len + (extra_writes * 2);
+      uint32_t *dw;
+      dw = anv_batch_emitn(&batch, total_len, GENX(MI_STORE_DATA_IMM),
          .ForceWriteCompletionCheck = is_last_write,
          .StoreQword = true,
          .Address = anv_address_from_u64(submit->l3l2_binds[i].pte_addr),
-         .ImmediateData = submit->l3l2_binds[i].entry_addr,
       );
+      dw += 3;
+      for (int j = 0; j < extra_writes + 1; j++) {
+         uint64_t entry_addr_64b = submit->l3l2_binds[i + j].entry_addr;
+         *dw = entry_addr_64b & 0xFFFFFFFF;
+         dw++;
+         *dw = (entry_addr_64b >> 32) & 0xFFFFFFFF;
+         dw++;
+      }
+      assert(dw == batch.next);
+
+      i += extra_writes;
    }
 
    for (int i = 0; i < submit->l1_binds_len; i++) {
@@ -8481,10 +8509,10 @@ genX(write_trtt_entries)(struct anv_trtt_submission 
*submit)
 
       bool is_last_write = i + extra_writes + 1 == submit->l1_binds_len;
 
-      uint32_t dword_full_len = GENX(MI_STORE_DATA_IMM_length_bias) +
-                                dword_write_len + extra_writes;
+      uint32_t total_len = GENX(MI_STORE_DATA_IMM_length_bias) +
+                           dword_write_len + extra_writes;
       uint32_t *dw;
-      dw = anv_batch_emitn(&batch, dword_full_len, GENX(MI_STORE_DATA_IMM),
+      dw = anv_batch_emitn(&batch, total_len, GENX(MI_STORE_DATA_IMM),
          .ForceWriteCompletionCheck = is_last_write,
          .Address = anv_address_from_u64(submit->l1_binds[i].pte_addr),
       );

Reply via email to