Module: Mesa
Branch: main
Commit: 7a4e0a4d351e5e711e9e504c7e367a1e4ce8235e
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a4e0a4d351e5e711e9e504c7e367a1e4ce8235e

Author: Alyssa Rosenzweig <[email protected]>
Date:   Thu Sep  8 18:59:35 2022 -0400

agx: Implement texture offsets and comparators

Texture offsets and shadow comparison values get grouped into a vector
passed by register. Comparison values are provided as-is (fp32). Texture
offsets are packed into nibbles, but we can do this on the CPU, as
nonconstant offsets are forbidden in GLSL at least. They're also
forbidden in Vulkan/SPIR-V without ImageGatherExtended/
shaderImageGatherExtended. I'm happy kicking the NIR lowering can down
the line, this commit is complicated enough already.

Passes dEQP-GLES3.functional.shaders.texture_functions.texture.* and
dEQP-GLES3.functional.shaders.texture_functions.textureoffset.*

Signed-off-by: Alyssa Rosenzweig <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18525>

---

 src/asahi/compiler/agx_compile.c   | 45 +++++++++++++++++++++++++++++++++-----
 src/asahi/compiler/agx_compiler.h  |  2 ++
 src/asahi/compiler/agx_opcodes.py  |  8 ++++---
 src/asahi/compiler/agx_optimizer.c |  4 +++-
 src/asahi/compiler/agx_pack.c      | 21 ++++++++++--------
 5 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index 0a94c7d3672..57764f5874d 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1020,7 +1020,8 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
              texture = agx_immediate(instr->texture_index),
              sampler = agx_immediate(instr->sampler_index),
              lod = agx_immediate(0),
-             offset = agx_null();
+             compare = agx_null(),
+             packed_offset = agx_null();
 
    bool txf = instr->op == nir_texop_txf;
 
@@ -1085,9 +1086,32 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
          lod = index;
          break;
 
-      case nir_tex_src_ms_index:
-      case nir_tex_src_offset:
       case nir_tex_src_comparator:
+         assert(index.size == AGX_SIZE_32);
+         compare = index;
+         break;
+
+      case nir_tex_src_offset:
+      {
+         assert(instr->src[i].src.is_ssa);
+         nir_ssa_def *def = instr->src[i].src.ssa;
+         uint32_t packed = 0;
+
+         for (unsigned c = 0; c < def->num_components; ++c) {
+            nir_ssa_scalar s = nir_ssa_scalar_resolved(def, c);
+            assert(nir_ssa_scalar_is_const(s) && "no nonconstant offsets");
+
+            int32_t val = nir_ssa_scalar_as_uint(s);
+            assert((val >= -8 && val <= 7) && "out of bounds offset");
+
+            packed |= (val & 0xF) << (4 * c);
+         }
+
+         packed_offset = agx_mov_imm(b, 32, packed);
+         break;
+      }
+
+      case nir_tex_src_ms_index:
       case nir_tex_src_texture_offset:
       case nir_tex_src_sampler_offset:
       default:
@@ -1097,11 +1121,22 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
 
    agx_index dst = agx_dest_index(&instr->dest);
 
-   agx_instr *I = agx_texture_sample_to(b, dst, coords, lod, texture, sampler, 
offset,
+   /* Pack shadow reference value (compare) and packed offset together */
+   agx_index compare_offset = agx_null();
+
+   if (!agx_is_null(compare) && !agx_is_null(packed_offset))
+      compare_offset = agx_vec2(b, compare, packed_offset);
+   else if (!agx_is_null(packed_offset))
+      compare_offset = packed_offset;
+   else if (!agx_is_null(compare))
+      compare_offset = compare;
+
+   agx_instr *I = agx_texture_sample_to(b, dst, coords, lod, texture, sampler,
+         compare_offset,
          agx_tex_dim(instr->sampler_dim, instr->is_array),
          agx_lod_mode_for_nir(instr->op),
          0xF, /* TODO: wrmask */
-         0);
+         0, !agx_is_null(packed_offset), !agx_is_null(compare));
 
    if (txf)
       I->op = AGX_OPCODE_TEXTURE_LOAD;
diff --git a/src/asahi/compiler/agx_compiler.h 
b/src/asahi/compiler/agx_compiler.h
index cd315379a7d..8ac605a3f13 100644
--- a/src/asahi/compiler/agx_compiler.h
+++ b/src/asahi/compiler/agx_compiler.h
@@ -319,6 +319,8 @@ typedef struct {
 
    /* TODO: Handle tex ops more efficient */
    enum agx_dim dim : 3;
+   bool offset : 1;
+   bool shadow : 1;
 
    /* Final st_vary op */
    bool last : 1;
diff --git a/src/asahi/compiler/agx_opcodes.py 
b/src/asahi/compiler/agx_opcodes.py
index 019765ad6bb..f1c386077f1 100644
--- a/src/asahi/compiler/agx_opcodes.py
+++ b/src/asahi/compiler/agx_opcodes.py
@@ -94,6 +94,8 @@ MASK = immediate("mask")
 BFI_MASK = immediate("bfi_mask")
 LOD_MODE = immediate("lod_mode", "enum agx_lod_mode")
 DIM = immediate("dim", "enum agx_dim")
+OFFSET = immediate("offset", "bool")
+SHADOW = immediate("shadow", "bool")
 SCOREBOARD = immediate("scoreboard")
 ICOND = immediate("icond", "enum agx_icond")
 FCOND = immediate("fcond", "enum agx_fcond")
@@ -197,14 +199,14 @@ op("fcmpsel",
       encoding_32 = (0x02, 0x7F, 8, 10),
       srcs = 4, imms = [FCOND])
 
-# sources are coordinates, LOD, texture, sampler, offset
+# sources are coordinates, LOD, texture, sampler, shadow/offset
 # TODO: anything else?
 op("texture_sample",
       encoding_32 = (0x31, 0x7F, 8, 10), # XXX WRONG SIZE
-      srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD])
+      srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SHADOW])
 op("texture_load",
       encoding_32 = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE
-      srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD])
+      srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET])
 
 # sources are base, index
 op("device_load",
diff --git a/src/asahi/compiler/agx_optimizer.c 
b/src/asahi/compiler/agx_optimizer.c
index b63bc178e2e..51d022f7447 100644
--- a/src/asahi/compiler/agx_optimizer.c
+++ b/src/asahi/compiler/agx_optimizer.c
@@ -198,7 +198,9 @@ agx_optimizer_forward(agx_context *ctx)
          agx_optimizer_fmov(defs, I);
 
       /* Inline immediates if we can. TODO: systematic */
-      if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op 
!= AGX_OPCODE_P_COMBINE)
+      if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE &&
+          I->op != AGX_OPCODE_P_COMBINE && I->op != AGX_OPCODE_TEXTURE_SAMPLE 
&&
+          I->op != AGX_OPCODE_TEXTURE_LOAD)
          agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
    }
 
diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c
index 69637ba0ac0..557a50e1940 100644
--- a/src/asahi/compiler/agx_pack.c
+++ b/src/asahi/compiler/agx_pack.c
@@ -77,12 +77,15 @@ agx_pack_sampler(agx_index index, bool *flag)
 }
 
 static unsigned
-agx_pack_sample_offset(agx_index index, bool *flag)
+agx_pack_sample_compare_offset(agx_index index)
 {
-   /* TODO: offsets */
-   assert(index.type == AGX_INDEX_NULL);
-   *flag = 0;
-   return 0;
+   if (index.type == AGX_INDEX_NULL)
+      return 0;
+
+   assert(index.size == AGX_SIZE_32);
+   assert(index.value < 0x100);
+   assert_register_is_aligned(index);
+   return index.value;
 }
 
 static unsigned
@@ -576,18 +579,18 @@ agx_pack_instr(struct util_dynarray *emission, struct 
util_dynarray *fixups, agx
       assert(I->mask != 0);
       assert(I->format <= 0x10);
 
-      bool Rt, Ot, Ct, St;
+      bool Rt, Ct, St;
       unsigned Tt;
 
       unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);
       unsigned C = agx_pack_sample_coords(I->src[0], &Ct);
       unsigned T = agx_pack_texture(I->src[2], &Tt);
       unsigned S = agx_pack_sampler(I->src[3], &St);
-      unsigned O = agx_pack_sample_offset(I->src[4], &Ot);
+      unsigned O = agx_pack_sample_compare_offset(I->src[4]);
       unsigned D = agx_pack_lod(I->src[1]);
 
       unsigned U = 0; // TODO: what is sampler ureg?
-      unsigned q1 = 0; // XXX
+      unsigned q1 = I->shadow;
       unsigned q2 = 0; // XXX
       unsigned q3 = 12; // XXX
       unsigned kill = 0; // helper invocation kill bit
@@ -603,7 +606,7 @@ agx_pack_instr(struct util_dynarray *emission, struct 
util_dynarray *fixups, agx
             ((T >> 6) << 14) |
             ((O & BITFIELD_MASK(6)) << 16) |
             (q6 << 22) |
-            (Ot << 27) |
+            (I->offset << 27) |
             ((S >> 6) << 28) |
             ((O >> 6) << 30);
 

Reply via email to