2017-07-24 10:28 GMT+02:00 Wladimir J. van der Laan <laa...@gmail.com>: > This patch adds support for large shaders on GC3000. For example the "terrain" > glmark benchmark with a large fragment shader will work after this. > > If the GPU supports ICACHE, shaders larger than the available state area will > be uploaded to a bo of their own and instructed to be loaded from memory on > demand. Small shaders will be uploaded in the usual way. This mimics the > behavior of the blob. > > On GPUs that don't support ICACHE, this patch should make no difference. > > Signed-off-by: Wladimir J. van der Laan <laa...@gmail.com>
Reviewed-by: Christian Gmeiner <christian.gmei...@gmail.com> > --- > src/gallium/drivers/etnaviv/etnaviv_compiler.c | 3 +- > src/gallium/drivers/etnaviv/etnaviv_compiler.h | 5 +++ > src/gallium/drivers/etnaviv/etnaviv_emit.c | 52 > ++++++++++++++++++-------- > src/gallium/drivers/etnaviv/etnaviv_internal.h | 4 ++ > src/gallium/drivers/etnaviv/etnaviv_screen.c | 4 +- > src/gallium/drivers/etnaviv/etnaviv_shader.c | 45 +++++++++++++++++++++- > 6 files changed, 95 insertions(+), 18 deletions(-) > > diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c > b/src/gallium/drivers/etnaviv/etnaviv_compiler.c > index fbe66d0..0664d52 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c > +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c > @@ -2277,7 +2277,7 @@ etna_compile_check_limits(struct etna_compile *c) > /* round up number of uniforms, including immediates, in units of four */ > int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4; > > - if (c->inst_ptr > c->specs->max_instructions) { > + if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) { > DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr, > c->specs->max_instructions); > return false; > @@ -2501,6 +2501,7 @@ etna_compile_shader(struct etna_shader_variant *v) > v->vs_pointsize_out_reg = -1; > v->ps_color_out_reg = -1; > v->ps_depth_out_reg = -1; > + v->needs_icache = c->inst_ptr > c->specs->max_instructions; > copy_uniform_state_to_shader(c, v); > > if (c->info.processor == PIPE_SHADER_VERTEX) { > diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.h > b/src/gallium/drivers/etnaviv/etnaviv_compiler.h > index 88a093f..f5c1689 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.h > +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.h > @@ -94,12 +94,17 @@ struct etna_shader_variant { > /* unknown input property (XX_INPUT_COUNT, field UNK8) */ > uint32_t input_count_unk8; > > + /* shader is larger than GPU instruction limit, thus needs icache */ > + bool needs_icache; > + > /* shader variants form a linked list */ > struct etna_shader_variant *next; > > /* replicated here to avoid passing extra ptrs everywhere */ > struct etna_shader *shader; > struct etna_shader_key key; > + > + struct etna_bo *bo; /* cached code memory bo handle (for icache) */ > }; > > struct etna_varying { > diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c > b/src/gallium/drivers/etnaviv/etnaviv_emit.c > index 273b3d0..c2117d5 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c > +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c > @@ -421,9 +421,6 @@ etna_emit_state(struct etna_context *ctx) > if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { > /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, > ctx->shader_state.VS_LOAD_BALANCING); > /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC); > - if (ctx->specs.has_shader_range_registers) { > - /*0085C*/ EMIT_STATE(VS_RANGE, (ctx->shader_state.vs_inst_mem_size > / 4 - 1) << 16); > - } > } > if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) { > /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, > ctx->viewport.PA_VIEWPORT_SCALE_X); > @@ -534,10 +531,6 @@ etna_emit_state(struct etna_context *ctx) > : ctx->shader_state.PS_TEMP_REGISTER_CONTROL); > /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL); > /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC); > - if (ctx->specs.has_shader_range_registers) { > - /*0101C*/ EMIT_STATE(PS_RANGE, ((ctx->shader_state.ps_inst_mem_size > / 4 - 1 + 0x100) << 16) | > - 0x100); > - } > } > if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) { > uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG; > @@ -739,14 +732,43 @@ etna_emit_state(struct etna_context *ctx) > if (dirty & (ETNA_DIRTY_SHADER)) { > /* Special case: a new shader was loaded; simply re-load all uniforms > and > * shader code at once */ > - /*04000 or 0C000*/ > - etna_set_state_multi(stream, ctx->specs.vs_offset, > - ctx->shader_state.vs_inst_mem_size, > - ctx->shader_state.VS_INST_MEM); > - /*06000 or 0D000*/ > - etna_set_state_multi(stream, ctx->specs.ps_offset, > - ctx->shader_state.ps_inst_mem_size, > - ctx->shader_state.PS_INST_MEM); > + if (ctx->shader_state.VS_INST_ADDR.bo || > ctx->shader_state.PS_INST_ADDR.bo) { > + assert(ctx->specs.has_icache && > ctx->specs.has_shader_range_registers); > + /* Set icache (VS) */ > + etna_set_state(stream, VIVS_VS_RANGE, > (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); > + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, > + VIVS_VS_ICACHE_CONTROL_ENABLE | > + VIVS_VS_ICACHE_CONTROL_FLUSH_VS); > + assert(ctx->shader_state.VS_INST_ADDR.bo); > + etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, > &ctx->shader_state.VS_INST_ADDR); > + > + /* Set icache (PS) */ > + etna_set_state(stream, VIVS_PS_RANGE, > (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16); > + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, > + VIVS_VS_ICACHE_CONTROL_ENABLE | > + VIVS_VS_ICACHE_CONTROL_FLUSH_PS); > + assert(ctx->shader_state.PS_INST_ADDR.bo); > + etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, > &ctx->shader_state.PS_INST_ADDR); > + } else { > + /* Upload shader directly, first flushing and disabling icache if > + * supported on this hw */ > + if (ctx->specs.has_icache) { > + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, > + VIVS_VS_ICACHE_CONTROL_FLUSH_PS | > + VIVS_VS_ICACHE_CONTROL_FLUSH_VS); > + } > + if (ctx->specs.has_shader_range_registers) { > + etna_set_state(stream, VIVS_VS_RANGE, > (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); > + etna_set_state(stream, VIVS_PS_RANGE, > ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) | > + 0x100); > + } > + etna_set_state_multi(stream, ctx->specs.vs_offset, > + ctx->shader_state.vs_inst_mem_size, > + ctx->shader_state.VS_INST_MEM); > + etna_set_state_multi(stream, ctx->specs.ps_offset, > + ctx->shader_state.ps_inst_mem_size, > + ctx->shader_state.PS_INST_MEM); > + } > > if (ctx->specs.has_unified_uniforms) { > etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0); > diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h > b/src/gallium/drivers/etnaviv/etnaviv_internal.h > index 5c13f23..a6544f6 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h > +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h > @@ -76,6 +76,8 @@ struct etna_specs { > unsigned single_buffer : 1; > /* has unified uniforms memory */ > unsigned has_unified_uniforms : 1; > + /* can load shader instructions from memory */ > + unsigned has_icache : 1; > /* can use any kind of wrapping mode on npot textures */ > unsigned npot_tex_any_wrap; > /* number of bits per TS tile */ > @@ -250,6 +252,8 @@ struct compiled_shader_state { > uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS * 4]; > uint32_t *PS_INST_MEM; > uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS * 4]; > + struct etna_reloc PS_INST_ADDR; > + struct etna_reloc VS_INST_ADDR; > }; > > /* state of some 3d and common registers relevant to etna driver */ > diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c > b/src/gallium/drivers/etnaviv/etnaviv_screen.c > index 81480e9..4fcbe87 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c > +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c > @@ -665,7 +665,8 @@ etna_get_specs(struct etna_screen *screen) > * same. > */ > screen->specs.ps_offset = 0x8000 + 0x1000; > - screen->specs.max_instructions = 256; > + screen->specs.max_instructions = 256; /* maximum number instructions > for non-icache use */ > + screen->specs.has_icache = true; > } else { > if (instruction_count > 256) { /* unified instruction memory? */ > screen->specs.vs_offset = 0xC000; > @@ -676,6 +677,7 @@ etna_get_specs(struct etna_screen *screen) > screen->specs.ps_offset = 0x6000; > screen->specs.max_instructions = instruction_count / 2; > } > + screen->specs.has_icache = false; > } > > if (VIV_FEATURE(screen, chipMinorFeatures1, HALTI0)) { > diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.c > b/src/gallium/drivers/etnaviv/etnaviv_shader.c > index b5256e4..6012680 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_shader.c > +++ b/src/gallium/drivers/etnaviv/etnaviv_shader.c > @@ -29,12 +29,30 @@ > #include "etnaviv_compiler.h" > #include "etnaviv_context.h" > #include "etnaviv_debug.h" > +#include "etnaviv_screen.h" > #include "etnaviv_util.h" > > #include "tgsi/tgsi_parse.h" > #include "util/u_math.h" > #include "util/u_memory.h" > > +/* Upload shader code to bo, if not already done */ > +static bool etna_icache_upload_shader(struct etna_context *ctx, struct > etna_shader_variant *v) > +{ > + if (v->bo) > + return true; > + v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, > DRM_ETNA_GEM_CACHE_UNCACHED); > + if (!v->bo) > + return false; > + > + void *buf = etna_bo_map(v->bo); > + etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE); > + memcpy(buf, v->code, v->code_size*4); > + etna_bo_cpu_fini(v->bo); > + DBG("Uploaded %s of %u words to bo %p", v->processor == > PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo); > + return true; > +} > + > /* Link vs and fs together: fill in shader_state from vs and fs > * as this function is called every time a new fs or vs is bound, the goal > is to > * do little processing as possible here, and to precompute as much as > possible in > @@ -45,7 +63,7 @@ > */ > static bool > etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs, > - const struct etna_shader_variant *vs, const struct > etna_shader_variant *fs) > + struct etna_shader_variant *vs, struct etna_shader_variant > *fs) > { > struct etna_shader_link_info link = { }; > > @@ -164,9 +182,32 @@ etna_link_shaders(struct etna_context *ctx, struct > compiled_shader_state *cs, > /* reference instruction memory */ > cs->vs_inst_mem_size = vs->code_size; > cs->VS_INST_MEM = vs->code; > + > cs->ps_inst_mem_size = fs->code_size; > cs->PS_INST_MEM = fs->code; > > + if (vs->needs_icache | fs->needs_icache) { > + /* If either of the shaders needs ICACHE, we use it for both. It is > + * either switched on or off for the entire shader processor. > + */ > + if (!etna_icache_upload_shader(ctx, vs) || > + !etna_icache_upload_shader(ctx, fs)) { > + assert(0); > + return false; > + } > + > + cs->VS_INST_ADDR.bo = vs->bo; > + cs->VS_INST_ADDR.offset = 0; > + cs->VS_INST_ADDR.flags = ETNA_RELOC_READ; > + cs->PS_INST_ADDR.bo = fs->bo; > + cs->PS_INST_ADDR.offset = 0; > + cs->PS_INST_ADDR.flags = ETNA_RELOC_READ; > + } else { > + /* clear relocs */ > + memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR)); > + memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR)); > + } > + > return true; > } > > @@ -352,6 +393,8 @@ etna_delete_shader_state(struct pipe_context *pctx, void > *ss) > while (v) { > t = v; > v = v->next; > + if (t->bo) > + etna_bo_del(t->bo); > etna_destroy_shader(t); > } > > -- > 2.7.4 > greets -- Christian Gmeiner, MSc https://christian-gmeiner.info _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev