Module: Mesa
Branch: main
Commit: 407f2beb97b3bcfd8eae262c906636664f4802bd
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=407f2beb97b3bcfd8eae262c906636664f4802bd

Author: Lionel Landwerlin <[email protected]>
Date:   Sun Jul 24 16:17:17 2022 +0300

intel/fs: port block a64/surface messages to use LSC

v2: Fixup block load/store on surfaces/shared-memory (Rohan)

v3: drop write specific size_written case (Rohan)

Signed-off-by: Lionel Landwerlin <[email protected]>
Reviewed-by: Rohan Garg <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17555>

---

 src/intel/compiler/brw_fs_nir.cpp              |  15 ++--
 src/intel/compiler/brw_lower_logical_sends.cpp | 113 ++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 563c6055992..fa7eafda0a9 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4184,10 +4184,13 @@ fs_visitor::swizzle_nir_scratch_addr(const 
brw::fs_builder &bld,
 }
 
 static unsigned
-choose_oword_block_size_dwords(unsigned dwords)
+choose_oword_block_size_dwords(const struct intel_device_info *devinfo,
+                               unsigned dwords)
 {
    unsigned block;
-   if (dwords >= 32) {
+   if (devinfo->has_lsc && dwords >= 64) {
+      block = 64;
+   } else if (dwords >= 32) {
       block = 32;
    } else if (dwords >= 16) {
       block = 16;
@@ -5670,7 +5673,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
 
       while (loaded < total) {
          const unsigned block =
-            choose_oword_block_size_dwords(total - loaded);
+            choose_oword_block_size_dwords(devinfo, total - loaded);
          const unsigned block_bytes = block * 4;
 
          const fs_builder &ubld = block == 8 ? ubld8 : ubld16;
@@ -5707,7 +5710,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
 
       while (written < total) {
          const unsigned block =
-            choose_oword_block_size_dwords(total - written);
+            choose_oword_block_size_dwords(devinfo, total - written);
 
          fs_reg srcs[A64_LOGICAL_NUM_SRCS];
          srcs[A64_LOGICAL_ADDRESS] = address;
@@ -5751,7 +5754,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
 
       while (loaded < total) {
          const unsigned block =
-            choose_oword_block_size_dwords(total - loaded);
+            choose_oword_block_size_dwords(devinfo, total - loaded);
          const unsigned block_bytes = block * 4;
 
          srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(block);
@@ -5793,7 +5796,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
 
       while (written < total) {
          const unsigned block =
-            choose_oword_block_size_dwords(total - written);
+            choose_oword_block_size_dwords(devinfo, total - written);
 
          srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(block);
          srcs[SURFACE_LOGICAL_SRC_DATA] =
diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp 
b/src/intel/compiler/brw_lower_logical_sends.cpp
index 9f460bb9294..cfcf9a0e8a5 100644
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -1829,6 +1829,79 @@ lower_lsc_surface_logical_send(const fs_builder &bld, 
fs_inst *inst)
    inst->src[3] = payload2;
 }
 
+static void
+lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
+{
+   const intel_device_info *devinfo = bld.shader->devinfo;
+   assert(devinfo->has_lsc);
+
+   /* Get the logical send arguments. */
+   const fs_reg &addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
+   const fs_reg &src = inst->src[SURFACE_LOGICAL_SRC_DATA];
+   const fs_reg &surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
+   const fs_reg &surface_handle = 
inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
+   const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
+   assert(arg.file == IMM);
+   assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE);
+   assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE);
+
+   const bool is_stateless =
+      surface.file == IMM && (surface.ud == BRW_BTI_STATELESS ||
+                              surface.ud == GFX8_BTI_STATELESS_NON_COHERENT);
+
+   const bool has_side_effects = inst->has_side_effects();
+
+   const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL;
+
+   fs_builder ubld = bld.exec_all().group(1, 0);
+   fs_reg ex_desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+   if (is_stateless) {
+      ubld.AND(ex_desc, retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
+                        brw_imm_ud(INTEL_MASK(31, 10)));
+   } else {
+      ubld.MOV(ex_desc, surface_handle);
+   }
+
+   fs_reg data;
+   if (write) {
+      const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
+      data = retype(bld.move_to_vgrf(src, src_sz), BRW_REGISTER_TYPE_UD);
+   }
+
+   inst->opcode = SHADER_OPCODE_SEND;
+   if (surface.file == IMM && surface.ud == GFX7_BTI_SLM)
+      inst->sfid = GFX12_SFID_SLM;
+   else
+      inst->sfid = GFX12_SFID_UGM;
+   inst->desc = lsc_msg_desc(devinfo,
+                             write ? LSC_OP_STORE : LSC_OP_LOAD,
+                             1 /* exec_size */,
+                             inst->sfid == GFX12_SFID_SLM ?
+                             LSC_ADDR_SURFTYPE_FLAT : LSC_ADDR_SURFTYPE_BSS,
+                             LSC_ADDR_SIZE_A32,
+                             1 /* num_coordinates */,
+                             LSC_DATA_SIZE_D32,
+                             arg.ud /* num_channels */,
+                             true /* transpose */,
+                             LSC_CACHE_LOAD_L1STATE_L3MOCS,
+                             !write /* has_dest */);
+
+   inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
+   inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE;
+   inst->exec_size = 1;
+   inst->ex_mlen = write ? DIV_ROUND_UP(arg.ud, 8) : 0;
+   inst->header_size = 0;
+   inst->send_has_side_effects = has_side_effects;
+   inst->send_is_volatile = !has_side_effects;
+
+   inst->resize_sources(4);
+
+   inst->src[0] = brw_imm_ud(0); /* desc */
+   inst->src[1] = ex_desc;       /* ex_desc */
+   inst->src[2] = addr;          /* payload */
+   inst->src[3] = data;          /* payload2 */
+}
+
 static void
 lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)
 {
@@ -2031,6 +2104,36 @@ lower_lsc_a64_logical_send(const fs_builder &bld, 
fs_inst *inst)
                                 !inst->dst.is_null());
       break;
    }
+   case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
+   case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
+      inst->exec_size = 1;
+      inst->desc = lsc_msg_desc(devinfo,
+                                LSC_OP_LOAD,
+                                1 /* exec_size */,
+                                LSC_ADDR_SURFTYPE_FLAT,
+                                LSC_ADDR_SIZE_A64,
+                                1 /* num_coordinates */,
+                                LSC_DATA_SIZE_D32,
+                                arg /* num_channels */,
+                                true /* transpose */,
+                                LSC_CACHE_LOAD_L1STATE_L3MOCS,
+                                true /* has_dest */);
+      break;
+   case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
+      inst->exec_size = 1;
+      inst->desc = lsc_msg_desc(devinfo,
+                                LSC_OP_STORE,
+                                1 /* exec_size */,
+                                LSC_ADDR_SURFTYPE_FLAT,
+                                LSC_ADDR_SIZE_A64,
+                                1 /* num_coordinates */,
+                                LSC_DATA_SIZE_D32,
+                                arg /* num_channels */,
+                                true /* transpose */,
+                                LSC_CACHE_LOAD_L1STATE_L3MOCS,
+                                false /* has_dest */);
+
+      break;
    default:
       unreachable("Unknown A64 logical instruction");
    }
@@ -2662,6 +2765,10 @@ fs_visitor::lower_logical_sends()
 
       case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
       case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
+         if (devinfo->has_lsc) {
+            lower_lsc_block_logical_send(ibld, inst);
+            break;
+         }
          lower_surface_block_logical_send(ibld, inst);
          break;
 
@@ -2675,13 +2782,13 @@ fs_visitor::lower_logical_sends()
       case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
       case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
       case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL:
+      case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
+      case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
+      case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
          if (devinfo->has_lsc) {
             lower_lsc_a64_logical_send(ibld, inst);
             break;
          }
-      case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
-      case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
-      case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
          lower_a64_logical_send(ibld, inst);
          break;
 

Reply via email to