Module: Mesa Branch: master Commit: ce36e60b18b30a1496b308e8f2ce6bda57b8699b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ce36e60b18b30a1496b308e8f2ce6bda57b8699b
Author: Eric Anholt <[email protected]> Date: Thu Mar 18 13:25:45 2021 -0700 freedreno/a4xx: Switch to using ir3_cache for looking up our VS/FS Saves the lock/unlock to get the variants for VS/BS/FS programs, and gives us a place we could hang future linked program state. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9698> --- src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 29 ++++++++++++------- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 2 +- src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 13 ++++----- src/gallium/drivers/freedreno/a4xx/fd4_gmem.c | 31 ++++++++++++++------ src/gallium/drivers/freedreno/a4xx/fd4_program.c | 37 ++++++++++++++++++++++++ src/gallium/drivers/freedreno/a4xx/fd4_program.h | 14 +++++++++ 6 files changed, 97 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index d4b03cc3970..7b7c8730f9f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -83,16 +83,19 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, struct fd4_emit emit = { .debug = &ctx->debug, .vtx = &ctx->vtx, - .prog = &ctx->prog, .info = info, .indirect = indirect, .draw = draw, .key = { - .rasterflat = ctx->rasterizer->flatshade, - .ucp_enables = ctx->rasterizer->clip_plane_enable, - .has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb, - .vastc_srgb = fd4_ctx->vastc_srgb, - .fastc_srgb = fd4_ctx->fastc_srgb, + .vs = ctx->prog.vs, + .fs = ctx->prog.fs, + .key = { + .rasterflat = ctx->rasterizer->flatshade, + .ucp_enables = ctx->rasterizer->clip_plane_enable, + .has_per_samp = fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb, + .vastc_srgb = fd4_ctx->vastc_srgb, + .fastc_srgb = fd4_ctx->fastc_srgb, + }, }, .rasterflat = ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, @@ -105,17 +108,21 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, !u_trim_pipe_prim(info->mode, (unsigned*)&draw->count)) return false; - ir3_fixup_shader_state(&ctx->base, &emit.key); + ir3_fixup_shader_state(&ctx->base, &emit.key.key); enum fd_dirty_3d_state dirty = ctx->dirty; - const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit); - const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit); - /* do regular pass first, since that is more likely to fail compiling: */ + emit.prog = fd4_program_state(ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug)); - if (!vp || !fp) + /* bail if compile failed: */ + if (!emit.prog) return false; + const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit); + const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit); + + /* do regular pass first: */ + if (unlikely(ctx->stats_users > 0)) { ctx->stats.vs_regs += ir3_shader_halfregs(vp); ctx->stats.fs_regs += ir3_shader_halfregs(fp); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index fbad703b14d..dee49205bd5 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -699,7 +699,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd4_program_emit(ring, emit, n, pfb->cbufs); } - if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ + if (!emit->skip_consts) { /* evil hack to deal sanely with clear path */ ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw); if (!emit->binning_pass) ir3_emit_fs_consts(fp, ring, ctx); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 2ff413b2a2d..64b62bccb66 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -43,18 +43,19 @@ void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct fd4_emit { struct pipe_debug_callback *debug; const struct fd_vertex_state *vtx; - const struct fd_program_stateobj *prog; + const struct fd4_program_state *prog; const struct pipe_draw_info *info; const struct pipe_draw_indirect_info *indirect; const struct pipe_draw_start_count *draw; bool binning_pass; - struct ir3_shader_key key; + struct ir3_cache_key key; enum fd_dirty_3d_state dirty; uint32_t sprite_coord_enable; /* bitmask */ bool sprite_coord_mode; bool rasterflat; bool no_decode_srgb; + bool skip_consts; /* cached to avoid repeated lookups of same variants: */ const struct ir3_shader_variant *vs, *fs; @@ -72,9 +73,7 @@ static inline const struct ir3_shader_variant * fd4_emit_get_vp(struct fd4_emit *emit) { if (!emit->vs) { - struct ir3_shader *shader = ir3_get_shader(emit->prog->vs); - emit->vs = ir3_shader_variant(shader, emit->key, - emit->binning_pass, emit->debug); + emit->vs = emit->binning_pass ? emit->prog->bs : emit->prog->vs; } return emit->vs; } @@ -88,9 +87,7 @@ fd4_emit_get_fp(struct fd4_emit *emit) static const struct ir3_shader_variant binning_fs = {}; emit->fs = &binning_fs; } else { - struct ir3_shader *shader = ir3_get_shader(emit->prog->fs); - emit->fs = ir3_shader_variant(shader, emit->key, - false, emit->debug); + emit->fs = emit->prog->fs; } } return emit->fs; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index b39fa795c19..32fff9f0958 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -42,6 +42,18 @@ #include "fd4_format.h" #include "fd4_zsa.h" +static void +fd4_gmem_emit_set_prog(struct fd_context *ctx, struct fd4_emit *emit, struct fd_program_stateobj *prog) +{ + emit->skip_consts = true; + emit->key.vs = prog->vs; + emit->key.fs = prog->fs; + emit->prog = fd4_program_state(ir3_cache_lookup(ctx->shader_cache, &emit->key, &ctx->debug)); + /* reset the fd4_emit_get_*p cache */ + emit->vs = NULL; + emit->fs = NULL; +} + static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface **bufs, const uint32_t *bases, @@ -194,8 +206,8 @@ fd4_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile) struct fd4_emit emit = { .debug = &ctx->debug, .vtx = &ctx->solid_vbuf_state, - .prog = &ctx->solid_prog, }; + fd4_gmem_emit_set_prog(ctx, &emit, &ctx->solid_prog); OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); @@ -331,10 +343,11 @@ fd4_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) .debug = &ctx->debug, .vtx = &ctx->blit_vbuf_state, .sprite_coord_enable = 1, - /* NOTE: They all use the same VP, this is for vtx bufs. */ - .prog = &ctx->blit_prog[0], .no_decode_srgb = true, }; + /* NOTE: They all use the same VP, this is for vtx bufs. */ + fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]); + unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; float x0, y0, x1, y1; unsigned bin_w = tile->bin_w; @@ -451,8 +464,7 @@ fd4_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) bin_h = gmem->bin_h; if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { - emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; - emit.fs = NULL; /* frag shader changed so clear cache */ + fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[pfb->nr_cbufs - 1]); fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); } @@ -461,8 +473,10 @@ fd4_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) switch (pfb->zsbuf->format) { case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: case PIPE_FORMAT_Z32_FLOAT: - emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ? - &ctx->blit_z : &ctx->blit_zs; + if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) + fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_z); + else + fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_zs); OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE | @@ -481,10 +495,9 @@ fd4_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) /* Non-float can use a regular color write. It's split over 8-bit * components, so half precision is always sufficient. */ - emit.prog = &ctx->blit_prog[0]; + fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]); break; } - emit.fs = NULL; /* frag shader changed so clear cache */ fd4_program_emit(ring, &emit, 1, &pfb->zsbuf); emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index f5f8ef26099..62e0d4c8001 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -534,9 +534,46 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, emit_shader(ring, s[FS].v); } +static struct ir3_program_state * +fd4_program_create(void *data, struct ir3_shader_variant *bs, + struct ir3_shader_variant *vs, + struct ir3_shader_variant *hs, + struct ir3_shader_variant *ds, + struct ir3_shader_variant *gs, + struct ir3_shader_variant *fs, + const struct ir3_shader_key *key) + in_dt +{ + struct fd_context *ctx = fd_context(data); + struct fd4_program_state *state = CALLOC_STRUCT(fd4_program_state); + + tc_assert_driver_thread(ctx->tc); + + state->bs = bs; + state->vs = vs; + state->fs = fs; + + return &state->base; +} + +static void +fd4_program_destroy(void *data, struct ir3_program_state *state) +{ + struct fd4_program_state *so = fd4_program_state(state); + free(so); +} + +static const struct ir3_cache_funcs cache_funcs = { + .create_state = fd4_program_create, + .destroy_state = fd4_program_destroy, +}; + void fd4_prog_init(struct pipe_context *pctx) { + struct fd_context *ctx = fd_context(pctx); + + ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx); ir3_prog_init(pctx); fd_prog_init(pctx); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h index a0a0bec264f..790adc9b7fd 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.h @@ -30,10 +30,24 @@ #include "pipe/p_context.h" #include "freedreno_context.h" +#include "ir3/ir3_cache.h" #include "ir3/ir3_shader.h" struct fd4_emit; +struct fd4_program_state { + struct ir3_program_state base; + struct ir3_shader_variant *bs; /* VS for when emit->binning */ + struct ir3_shader_variant *vs; + struct ir3_shader_variant *fs; /* FS for when !emit->binning */ +}; + +static inline struct fd4_program_state * +fd4_program_state(struct ir3_program_state *state) +{ + return (struct fd4_program_state *)state; +} + void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr, struct pipe_surface **bufs); _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
