Module: Mesa
Branch: main
Commit: acba08b58f11fe6b716ea46189ae597150abc16a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=acba08b58f11fe6b716ea46189ae597150abc16a

Author: Connor Abbott <[email protected]>
Date:   Tue Sep 21 16:50:04 2021 +0200

ir3: Implement and document ldc.k

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13148>

---

 src/compiler/nir/nir_intrinsics.py   |  5 +++
 src/freedreno/ir3/disasm-a3xx.c      |  1 +
 src/freedreno/ir3/instr-a3xx.h       |  2 +
 src/freedreno/ir3/ir3.h              |  1 +
 src/freedreno/ir3/ir3_compiler_nir.c | 29 ++++++++++++++
 src/freedreno/ir3/ir3_lexer.l        |  1 +
 src/freedreno/ir3/ir3_parser.y       |  8 +++-
 src/freedreno/ir3/ir3_print.c        |  2 +
 src/freedreno/ir3/ir3_validate.c     |  4 ++
 src/freedreno/ir3/tests/disasm.c     |  5 +++
 src/freedreno/isa/ir3-cat6.xml       | 78 ++++++++++++++++++++++++------------
 11 files changed, 109 insertions(+), 27 deletions(-)

diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index 1bb4c178b8b..04192896b09 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -1149,6 +1149,11 @@ barrier("preamble_end_ir3")
 # IR3-specific intrinsic for stc. Should be used in the shader preamble.
 store("uniform_ir3", [], indices=[BASE])
 
+# IR3-specific intrinsic for ldc.k. Copies UBO to constant file.
+# base is the const file base in components, range is the amount to copy in
+# vec4's.
+intrinsic("copy_ubo_to_uniform_ir3", [1, 1], indices=[BASE, RANGE])
+
 # DXIL specific intrinsics
 # src[] = { value, mask, index, offset }.
 intrinsic("store_ssbo_masked_dxil", [1, 1, 1, 1])
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index 224ef79b400..4f57000c898 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -395,6 +395,7 @@ static const struct opc_info {
    OPC(6, OPC_GETWID,       getwid),
    OPC(6, OPC_GETFIBERID,   getfiberid),
    OPC(6, OPC_STC,          stc),
+   OPC(6, OPC_LDC_K,        ldc.k),
 
    OPC(6, OPC_SPILL_MACRO,  spill.macro),
    OPC(6, OPC_RELOAD_MACRO, reload.macro),
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index aee2af83740..78a65e37c4e 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -355,6 +355,8 @@ typedef enum {
    OPC_SPILL_MACRO     = _OPC(6, 79),
    OPC_RELOAD_MACRO    = _OPC(6, 80),
 
+   OPC_LDC_K           = _OPC(6, 81),
+
    /* category 7: */
    OPC_BAR             = _OPC(7, 0),
    OPC_FENCE           = _OPC(7, 1),
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index ca46d3bd4ca..2d94c3a7d2b 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -2389,6 +2389,7 @@ INSTR2(QUAD_SHUFFLE_BRCST)
 INSTR1(QUAD_SHUFFLE_HORIZ)
 INSTR1(QUAD_SHUFFLE_VERT)
 INSTR1(QUAD_SHUFFLE_DIAG)
+INSTR2NODST(LDC_K)
 INSTR2NODST(STC)
 #if GPU >= 600
 INSTR3NODST(STIB);
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c 
b/src/freedreno/ir3/ir3_compiler_nir.c
index ecaf8e77fb3..a3d09fdfe52 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -899,6 +899,32 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, 
nir_intrinsic_instr *intr,
    ir3_split_dest(b, dst, ldc, 0, ncomp);
 }
 
+static void
+emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx,
+                                   nir_intrinsic_instr *intr)
+{
+   struct ir3_block *b = ctx->block;
+
+   unsigned base = nir_intrinsic_base(intr);
+   unsigned size = nir_intrinsic_range(intr);
+
+   struct ir3_instruction *addr1 = ir3_get_addr1(ctx, base);
+
+   struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0];
+   struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0];
+   struct ir3_instruction *ldc = ir3_LDC_K(b, idx, 0, offset, 0);
+   ldc->cat6.iim_val = size;
+   ldc->barrier_class = ldc->barrier_conflict = IR3_BARRIER_CONST_W;
+
+   ir3_handle_bindless_cat6(ldc, intr->src[0]);
+   if (ldc->flags & IR3_INSTR_B)
+      ctx->so->bindless_ubo = true;
+
+   ir3_instr_set_address(ldc, addr1);
+
+   array_insert(b, b->keeps, ldc);
+}
+
 /* handles direct/indirect UBO reads: */
 static void
 emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
@@ -2128,6 +2154,9 @@ emit_intrinsic(struct ir3_context *ctx, 
nir_intrinsic_instr *intr)
    case nir_intrinsic_load_ubo_vec4:
       emit_intrinsic_load_ubo_ldc(ctx, intr, dst);
       break;
+   case nir_intrinsic_copy_ubo_to_uniform_ir3:
+      emit_intrinsic_copy_ubo_to_uniform(ctx, intr);
+      break;
    case nir_intrinsic_load_frag_coord:
       ir3_split_dest(b, dst, get_frag_coord(ctx, intr), 0, 4);
       break;
diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l
index 181a17a90e6..52b97789645 100644
--- a/src/freedreno/ir3/ir3_lexer.l
+++ b/src/freedreno/ir3/ir3_lexer.l
@@ -414,6 +414,7 @@ static int parse_reg(const char *str)
 "p"                               return 'p';
 "s2en"                            return TOKEN(T_S2EN);
 "s"                               return 's';
+"k"                               return 'k';
 "base"[0-9]+                      ir3_yylval.num = strtol(yytext+4, NULL, 10); 
return T_BASE;
 "offset"[0-9]+                    ir3_yylval.num = strtol(yytext+6, NULL, 10); 
return T_OFFSET;
 "uniform"                         return T_UNIFORM;
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index 2aaebd91221..fd29c639da0 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -1229,8 +1229,12 @@ cat6_bindless_ibo: cat6_bindless_ibo_opc_1src cat6_typed 
cat6_dim cat6_type '.'
 
 cat6_bindless_ldc_opc: T_OP_LDC  { new_instr(OPC_LDC); }
 
-cat6_bindless_ldc: cat6_bindless_ldc_opc '.' T_OFFSET '.' cat6_immed '.' 
cat6_bindless_mode dst_reg ',' cat6_reg_or_immed ',' cat6_reg_or_immed {
-                      instr->cat6.d = $3;
+/* This is separated from the opcode to avoid lookahead/shift-reduce conflicts 
*/
+cat6_bindless_ldc_middle:
+                        T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg 
{ instr->cat6.d = $1; }
+|                       cat6_immed '.' 'k' '.' cat6_bindless_mode 'c' '[' T_A1 
']' { instr->opc = OPC_LDC_K; }
+
+cat6_bindless_ldc: cat6_bindless_ldc_opc '.' cat6_bindless_ldc_middle ',' 
cat6_reg_or_immed ',' cat6_reg_or_immed {
                       instr->cat6.type = TYPE_U32;
                       /* TODO cleanup ir3 src order: */
                       swap(instr->srcs[0], instr->srcs[1]);
diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c
index ba6405382e3..e85513d3ea9 100644
--- a/src/freedreno/ir3/ir3_print.c
+++ b/src/freedreno/ir3/ir3_print.c
@@ -215,6 +215,8 @@ print_instr_name(struct log_stream *stream, struct 
ir3_instruction *instr,
          mesa_log_stream_printf(stream, ".a1en");
       if (instr->opc == OPC_LDC)
          mesa_log_stream_printf(stream, ".offset%d", instr->cat6.d);
+      if (instr->opc == OPC_LDC_K)
+         mesa_log_stream_printf(stream, ".%d", instr->cat6.iim_val);
       if (instr->flags & IR3_INSTR_B) {
          mesa_log_stream_printf(
             stream, ".base%d",
diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c
index 84be40ca795..9c6d5ddcd90 100644
--- a/src/freedreno/ir3/ir3_validate.c
+++ b/src/freedreno/ir3/ir3_validate.c
@@ -350,6 +350,10 @@ validate_instr(struct ir3_validate_ctx *ctx, struct 
ir3_instruction *instr)
          validate_reg_size(ctx, instr->srcs[0], instr->cat6.type);
          validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
          break;
+      case OPC_LDC_K:
+         validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+         validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
+         break;
       default:
          validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
          validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index ae68ffc621c..f06e77c1929 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -348,6 +348,11 @@ static const struct test {
    INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* 
ldc.1.mode0.base0 r0.x, r0.x, 0 */
    INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* 
ldc.1.mode0.base0 r0.x, r0.x, 0 */
 
+   /* dEQP-VK.glsl.conditionals.if.if_else_vertex */
+   INSTR_6XX(c0360000_00c78100, "ldc.1.k.imm.base0 c[a1.x], 0, 0"), /* 
ldc.1.k.mode4.base0 c[a1.x], 0, 0 */
+   /* custom */
+   INSTR_6XX(c0360003_00c78100, "ldc.4.k.imm.base0 c[a1.x], 0, 0"), /* 
ldc.4.k.mode4.base0 c[a1.x], 0, 0 */
+
    /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
    INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
    INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),
diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml
index 4227d558dcf..9d6048b1040 100644
--- a/src/freedreno/isa/ir3-cat6.xml
+++ b/src/freedreno/isa/ir3-cat6.xml
@@ -878,27 +878,8 @@ SOFTWARE.
        </encode>
 </bitset>
 
-<bitset name="ldc" extends="#instruction-cat6-a6xx">
-       <doc>
-               LoaD Constant - UBO load
-       </doc>
-       <override>
-               <!-- TODO.. wtf? -->
-               <expr>{K}</expr>
-               <display>
-                       {SY}{JP}{NAME}.{TYPE_SIZE}.k.{MODE}{BASE} c[a1.x], 
{SRC1}, {SRC2}
-               </display>
-               <field   low="32" high="39" name="TYPE_SIZE_MINUS_ONE" 
type="uint"/>
-       </override>
-       <!--
-       TODO are these *really* all bindless?  Or does that bit have a different
-       meaning?  Maybe I don't have enough ldc examples from deqp-glesN
-        -->
-       <display>
-               {SY}{JP}{NAME}.offset{OFFSET}.{TYPE_SIZE}.{MODE}{BASE} {DST}, 
{SRC1}, {SRC2}
-       </display>
+<bitset name="#cat6-ldc-common" extends="#instruction-cat6-a6xx">
        <pattern pos="0"           >x</pattern>
-       <field   low="9"  high="10" name="OFFSET" type="uint"/>   <!-- 
D_MINUS_ONE -->
        <pattern pos="11"          >x</pattern>        <!-- TYPED -->
        <pattern low="14" high="19">011110</pattern>   <!-- OPC -->
        <pattern low="20" high="22">1xx</pattern>
@@ -910,19 +891,62 @@ SOFTWARE.
        <field   low="24" high="31" name="SRC1" type="#cat6-src">
                <param name="SRC1_IM" as="SRC_IM"/>
        </field>
-       <field   low="32" high="39" name="DST" type="#reg-gpr"/>
        <pattern low="49" high="51">x11</pattern>      <!-- TYPE -->
-       <field   pos="52"           name="K" type="bool"/>
-       <pattern pos="53"          >1</pattern>
        <encode>
-               <map name="K">0</map>  <!-- TODO.. once we figure out what this 
is -->
                <map name="SRC1_IM">!!(src->srcs[1]->flags &amp; 
IR3_REG_IMMED)</map>
-               <map name="OFFSET">src->cat6.d</map>
                <map name="SRC1">src->srcs[1]</map>
                <map name="SRC2">src->srcs[0]</map>
        </encode>
 </bitset>
 
+<bitset name="ldc.k" extends="#cat6-ldc-common">
+       <doc>
+               ldc.k copies a series of UBO values to constants. In other
+               words, it acts the same as a series of ldc followed by stc. It's
+               also similar to a CP_LOAD_STATE with a UBO source but executed
+               in the shader.
+
+               Like CP_LOAD_STATE, the UBO offset and const file offset must be
+               a multiple of 4 vec4's but it can load any number of vec4's. The
+               UBO descriptor and offset are the same as a normal ldc. The
+               const file offset is specified in a1.x and is in units of
+               components, and the number of vec4's to copy is specified in
+               LOAD_SIZE.
+       </doc>
+       <display>
+               {SY}{JP}ldc.{LOAD_SIZE}.k.{MODE}{BASE} c[a1.x], {SRC1}, {SRC2}
+       </display>
+
+       <derived name="LOAD_SIZE" expr="#cat6-load-size" type="uint"/>
+
+       <field   low="32" high="39" name="LOAD_SIZE_MINUS_ONE" type="uint"/>
+       <pattern low="9"  high="10">xx</pattern>   <!-- D_MINUS_ONE -->
+       <pattern low="52" high="53">11</pattern>
+
+       <encode>
+               <map name="LOAD_SIZE_MINUS_ONE">src->cat6.iim_val - 1</map>
+       </encode>
+</bitset>
+
+<bitset name="ldc" extends="#cat6-ldc-common">
+       <doc>
+               LoaD Constant - UBO load
+       </doc>
+       <!--
+       TODO are these *really* all bindless?  Or does that bit have a different
+       meaning?  Maybe I don't have enough ldc examples from deqp-glesN
+        -->
+       <display>
+               {SY}{JP}{NAME}.offset{OFFSET}.{TYPE_SIZE}.{MODE}{BASE} {DST}, 
{SRC1}, {SRC2}
+       </display>
+       <field   low="9"  high="10" name="OFFSET" type="uint"/>   <!-- 
D_MINUS_ONE -->
+       <field   low="32" high="39" name="DST" type="#reg-gpr"/>
+       <pattern low="52" high="53">10</pattern>
+       <encode>
+               <map name="OFFSET">src->cat6.d</map>
+       </encode>
+</bitset>
+
 <bitset name="getspid" extends="#instruction-cat6-a6xx">
        <doc>
                GET Shader Processor ID?
@@ -1135,6 +1159,10 @@ SOFTWARE.
        {TYPE_SIZE_MINUS_ONE} + 1
 </expr>
 
+<expr name="#cat6-load-size">
+       {LOAD_SIZE_MINUS_ONE} + 1
+</expr>
+
 <bitset name="#cat6-typed" size="1">
        <override>
                <expr>{TYPED}</expr>

Reply via email to