Module: Mesa Branch: master Commit: 4970aa55771e41ca0eb6dd8a1d707bb846c0d694 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4970aa55771e41ca0eb6dd8a1d707bb846c0d694
Author: Connor Abbott <[email protected]> Date: Thu Oct 29 15:05:24 2020 +0100 ir3: Initial support for private memory Add information that the driver will need to setup registers, and implement support for load_scratch/store_scratch using private memory. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7386> --- src/freedreno/ir3/ir3.c | 12 ++++++++ src/freedreno/ir3/ir3.h | 5 +++ src/freedreno/ir3/ir3_compiler_nir.c | 59 ++++++++++++++++++++++++++++++++++++ src/freedreno/ir3/ir3_nir.c | 1 + src/freedreno/ir3/ir3_shader.c | 5 +++ src/freedreno/ir3/ir3_shader.h | 5 +++ 6 files changed, 87 insertions(+) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 0f5a0c24812..431c75754f3 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -729,6 +729,11 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL; } + if ((instr->opc == OPC_STP || instr->opc == OPC_LDP) && + src2->iim_val * type_size(instr->cat6.type) > 32) { + info->multi_dword_ldp_stp = true; + } + /* TODO we need a more comprehensive list about which instructions * can be encoded which way. Or possibly use IR3_INSTR_0 flag to * indicate to use the src_off encoding even if offset is zero @@ -938,6 +943,7 @@ void * ir3_assemble(struct ir3_shader_variant *v) info->max_reg = -1; info->max_half_reg = -1; info->max_const = -1; + info->multi_dword_ldp_stp = false; uint32_t instr_count = 0; foreach_block (block, &shader->block_list) { @@ -1464,6 +1470,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, if ((instr->opc == OPC_STL) && (n != 2)) return false; + if ((instr->opc == OPC_LDP) && (n == 0)) + return false; + + if ((instr->opc == OPC_STP) && (n != 2)) + return false; + if (instr->opc == OPC_STLW && n == 0) return false; diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 262f2a28dcf..c5021cc6dfd 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -64,6 +64,7 @@ struct ir3_info { int8_t max_reg; /* highest GPR # used by shader */ int8_t max_half_reg; int16_t max_const; + bool multi_dword_ldp_stp; /* number of sync bits: */ uint16_t ss, sy; @@ -400,6 +401,8 @@ struct ir3_instruction { IR3_BARRIER_BUFFER_W = 1 << 6, IR3_BARRIER_ARRAY_R = 1 << 7, IR3_BARRIER_ARRAY_W = 1 << 8, + IR3_BARRIER_PRIVATE_R = 1 << 9, + IR3_BARRIER_PRIVATE_W = 1 << 10, } barrier_class, barrier_conflict; /* Entry in ir3_block's instruction list: */ @@ -1692,9 +1695,11 @@ INSTR2(LDLV) INSTR3(LDG) INSTR3(LDL) INSTR3(LDLW) +INSTR3(LDP) INSTR3(STG) INSTR3(STL) INSTR3(STLW) +INSTR3(STP) INSTR1(RESINFO) INSTR1(RESFMT) INSTR2(ATOMIC_ADD) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 154666af8fa..a05b1d9f6b1 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1052,6 +1052,57 @@ emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) return atomic; } +/* src[] = { offset }. */ +static void +emit_intrinsic_load_scratch(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *ldp, *offset; + + offset = ir3_get_src(ctx, &intr->src[0])[0]; + + ldp = ir3_LDP(b, offset, 0, + create_immed(b, intr->num_components), 0, + create_immed(b, 0), 0); + + ldp->cat6.type = utype_dst(intr->dest); + ldp->regs[0]->wrmask = MASK(intr->num_components); + + ldp->barrier_class = IR3_BARRIER_PRIVATE_R; + ldp->barrier_conflict = IR3_BARRIER_PRIVATE_W; + + ir3_split_dest(b, dst, ldp, 0, intr->num_components); +} + +/* src[] = { value, offset }. const_index[] = { write_mask } */ +static void +emit_intrinsic_store_scratch(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *stp, *offset; + struct ir3_instruction * const *value; + unsigned wrmask, ncomp; + + value = ir3_get_src(ctx, &intr->src[0]); + offset = ir3_get_src(ctx, &intr->src[1])[0]; + + wrmask = nir_intrinsic_write_mask(intr); + ncomp = ffs(~wrmask) - 1; + + assert(wrmask == BITFIELD_MASK(intr->num_components)); + + stp = ir3_STP(b, offset, 0, + ir3_create_collect(ctx, value, ncomp), 0, + create_immed(b, ncomp), 0); + stp->cat6.dst_offset = 0; + stp->cat6.type = utype_src(intr->src[0]); + stp->barrier_class = IR3_BARRIER_PRIVATE_W; + stp->barrier_conflict = IR3_BARRIER_PRIVATE_R | IR3_BARRIER_PRIVATE_W; + + array_insert(b, b->keeps, stp); +} + struct tex_src_info { /* For prefetch */ unsigned tex_base, samp_base, tex_idx, samp_idx; @@ -1714,6 +1765,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_shared_atomic_comp_swap: dst[0] = emit_intrinsic_atomic_shared(ctx, intr); break; + case nir_intrinsic_load_scratch: + emit_intrinsic_load_scratch(ctx, intr, dst); + break; + case nir_intrinsic_store_scratch: + emit_intrinsic_store_scratch(ctx, intr); + break; case nir_intrinsic_image_load: emit_intrinsic_load_image(ctx, intr, dst); break; @@ -3347,6 +3404,8 @@ emit_instructions(struct ir3_context *ctx) ctx->so->cull_mask = MASK(ctx->s->info.cull_distance_array_size) << ctx->s->info.clip_distance_array_size; + ctx->so->pvtmem_size = ctx->s->scratch_size; + /* NOTE: need to do something more clever when we support >1 fxn */ nir_foreach_register (reg, &fxn->registers) { ir3_declare_array(ctx, reg); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index d6d891a9560..2faa802deb1 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -261,6 +261,7 @@ should_split_wrmask(const nir_instr *instr, const void *data) case nir_intrinsic_store_ssbo: case nir_intrinsic_store_shared: case nir_intrinsic_store_global: + case nir_intrinsic_store_scratch: return true; default: return false; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 4cbe5f56e7d..da430340949 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -144,6 +144,11 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v) if (compiler->gpu_id >= 400) v->constlen = align(v->constlen, 4); + /* Use the per-wave layout by default on a6xx. It should result in better + * performance when loads/stores are to a uniform index. + */ + v->pvtmem_per_wave = compiler->gpu_id >= 600 && !v->info.multi_dword_ldp_stp; + fixup_regfootprint(v); return bin; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index bba3c627da3..6b5e2affede 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -554,6 +554,11 @@ struct ir3_shader_variant { */ unsigned constlen; + /* The private memory size in bytes */ + unsigned pvtmem_size; + /* Whether we should use the new per-wave layout rather than per-fiber. */ + bool pvtmem_per_wave; + /* About Linkage: * + Let the frag shader determine the position/compmask for the * varyings, since it is the place where we know if the varying _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
