Module: Mesa Branch: master Commit: 4256f7ed5847505c30e903b6674dac88c5d03315 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4256f7ed5847505c30e903b6674dac88c5d03315
Author: Kenneth Graunke <[email protected]> Date: Wed Feb 3 01:41:42 2021 -0800 iris: Fill out scratch base address dynamically Now that shaders are shared between contexts, we can't pre-bake the shader scratch address into the derived 3DSTATE_XS packets. Scratch buffers are and must be per-context, as multiple contexts could be executing shaders using scratch at the same time. So instead, we leave that field blank when pre-filling those packets up-front, and merge in the actual address when emitting them. It's a little more overhead, but only in the case where scratch is used. Fixes: 84a38ec1336 ("iris: Enable PIPE_CAP_SHAREABLE_SHADERS.") Reviewed-by: Anuj Phogat <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8922> --- src/gallium/drivers/iris/iris_program_cache.c | 3 +- src/gallium/drivers/iris/iris_screen.h | 2 +- src/gallium/drivers/iris/iris_state.c | 81 +++++++++++++-------------- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c index adb0b38c2e0..41845967695 100644 --- a/src/gallium/drivers/iris/iris_program_cache.c +++ b/src/gallium/drivers/iris/iris_program_cache.c @@ -124,6 +124,7 @@ iris_upload_shader(struct iris_context *ice, struct hash_table *cache = ice->shaders.cache; void *mem_ctx = ish ? NULL : (void *) cache; struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + const struct gen_device_info *devinfo = &screen->devinfo; struct iris_compiled_shader *shader = rzalloc_size(mem_ctx, sizeof(struct iris_compiled_shader) + screen->vtbl.derived_program_state_size(cache_id)); @@ -170,7 +171,7 @@ iris_upload_shader(struct iris_context *ice, ralloc_steal(shader, shader->system_values); /* Store the 3DSTATE shader packets and other derived state. */ - screen->vtbl.store_derived_program_state(ice, cache_id, shader); + screen->vtbl.store_derived_program_state(devinfo, cache_id, shader); if (ish) { assert(key_size <= sizeof(union iris_any_prog_key)); diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h index b9306c2d176..07aaf5dbe7b 100644 --- a/src/gallium/drivers/iris/iris_screen.h +++ b/src/gallium/drivers/iris/iris_screen.h @@ -110,7 +110,7 @@ struct iris_vtable { uint32_t report_id); unsigned (*derived_program_state_size)(enum iris_program_cache_id id); - void (*store_derived_program_state)(struct iris_context *ice, + void (*store_derived_program_state)(const struct gen_device_info *devinfo, enum iris_program_cache_id cache_id, struct iris_compiled_shader *shader); uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol, diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 3fc172dca4a..553490f7b3f 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4307,20 +4307,24 @@ KSP(const struct iris_compiled_shader *shader) pkt.Enable = true; \ \ if (prog_data->total_scratch) { \ - struct iris_bo *bo = \ - iris_get_scratch_space(ice, prog_data->total_scratch, stage); \ - uint32_t scratch_addr = bo->gtt_offset; \ pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \ - pkt.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr, \ - IRIS_DOMAIN_NONE); \ } +#define MERGE_SCRATCH_ADDR(name) \ +{ \ + uint32_t pkt2[GENX(name##_length)] = {0}; \ + _iris_pack_command(batch, GENX(name), pkt2, p) { \ + p.ScratchSpaceBasePointer = rw_bo(scratch_bo, 0, IRIS_DOMAIN_NONE); \ + } \ + iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \ +} + + /** * Encode most of 3DSTATE_VS based on the compiled shader. */ static void -iris_store_vs_state(struct iris_context *ice, - const struct gen_device_info *devinfo, +iris_store_vs_state(const struct gen_device_info *devinfo, struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; @@ -4339,8 +4343,7 @@ iris_store_vs_state(struct iris_context *ice, * Encode most of 3DSTATE_HS based on the compiled shader. */ static void -iris_store_tcs_state(struct iris_context *ice, - const struct gen_device_info *devinfo, +iris_store_tcs_state(const struct gen_device_info *devinfo, struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; @@ -4384,8 +4387,7 @@ iris_store_tcs_state(struct iris_context *ice, * Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader. */ static void -iris_store_tes_state(struct iris_context *ice, - const struct gen_device_info *devinfo, +iris_store_tes_state(const struct gen_device_info *devinfo, struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; @@ -4422,8 +4424,7 @@ iris_store_tes_state(struct iris_context *ice, * Encode most of 3DSTATE_GS based on the compiled shader. */ static void -iris_store_gs_state(struct iris_context *ice, - const struct gen_device_info *devinfo, +iris_store_gs_state(const struct gen_device_info *devinfo, struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; @@ -4470,8 +4471,7 @@ iris_store_gs_state(struct iris_context *ice, * Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader. */ static void -iris_store_fs_state(struct iris_context *ice, - const struct gen_device_info *devinfo, +iris_store_fs_state(const struct gen_device_info *devinfo, struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; @@ -4504,15 +4504,8 @@ iris_store_fs_state(struct iris_context *ice, ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; - if (prog_data->total_scratch) { - struct iris_bo *bo = - iris_get_scratch_space(ice, prog_data->total_scratch, - MESA_SHADER_FRAGMENT); - uint32_t scratch_addr = bo->gtt_offset; + if (prog_data->total_scratch) ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; - ps.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr, - IRIS_DOMAIN_NONE); - } } iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { @@ -4538,8 +4531,7 @@ iris_store_fs_state(struct iris_context *ice, * This must match the data written by the iris_store_xs_state() functions. */ static void -iris_store_cs_state(struct iris_context *ice, - const struct gen_device_info *devinfo, +iris_store_cs_state(const struct gen_device_info *devinfo, struct iris_compiled_shader *shader) { struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data; @@ -4596,31 +4588,28 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id) * get most of the state packet without having to reconstruct it. */ static void -iris_store_derived_program_state(struct iris_context *ice, +iris_store_derived_program_state(const struct gen_device_info *devinfo, enum iris_program_cache_id cache_id, struct iris_compiled_shader *shader) { - struct iris_screen *screen = (void *) ice->ctx.screen; - const struct gen_device_info *devinfo = &screen->devinfo; - switch (cache_id) { case IRIS_CACHE_VS: - iris_store_vs_state(ice, devinfo, shader); + iris_store_vs_state(devinfo, shader); break; case IRIS_CACHE_TCS: - iris_store_tcs_state(ice, devinfo, shader); + iris_store_tcs_state(devinfo, shader); break; case IRIS_CACHE_TES: - iris_store_tes_state(ice, devinfo, shader); + iris_store_tes_state(devinfo, shader); break; case IRIS_CACHE_GS: - iris_store_gs_state(ice, devinfo, shader); + iris_store_gs_state(devinfo, shader); break; case IRIS_CACHE_FS: - iris_store_fs_state(ice, devinfo, shader); + iris_store_fs_state(devinfo, shader); break; case IRIS_CACHE_CS: - iris_store_cs_state(ice, devinfo, shader); + iris_store_cs_state(devinfo, shader); case IRIS_CACHE_BLORP: break; default: @@ -5855,18 +5844,15 @@ iris_upload_dirty_render_state(struct iris_context *ice, struct iris_resource *cache = (void *) shader->assembly.res; iris_use_pinned_bo(batch, cache->bo, false, IRIS_DOMAIN_NONE); - if (prog_data->total_scratch > 0) { - struct iris_bo *bo = - iris_get_scratch_space(ice, prog_data->total_scratch, stage); - iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE); - } + struct iris_bo *scratch_bo = prog_data->total_scratch == 0 ? NULL : + iris_get_scratch_space(ice, prog_data->total_scratch, stage); if (stage == MESA_SHADER_FRAGMENT) { UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast; struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; - iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) { + _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) { ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; @@ -5898,6 +5884,11 @@ iris_upload_dirty_render_state(struct iris_context *ice, brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); ps.KernelStartPointer2 = KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); + + if (scratch_bo) { + ps.ScratchSpaceBasePointer = + rw_bo(scratch_bo, 0, IRIS_DOMAIN_NONE); + } } uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0}; @@ -5924,6 +5915,14 @@ iris_upload_dirty_render_state(struct iris_context *ice, GENX(3DSTATE_PS_length)); iris_emit_merge(batch, shader_psx, psx_state, GENX(3DSTATE_PS_EXTRA_length)); + } else if (scratch_bo) { + uint32_t *pkt = (uint32_t *) shader->derived_data; + switch (stage) { + case MESA_SHADER_VERTEX: MERGE_SCRATCH_ADDR(3DSTATE_VS); break; + case MESA_SHADER_TESS_CTRL: MERGE_SCRATCH_ADDR(3DSTATE_HS); break; + case MESA_SHADER_TESS_EVAL: MERGE_SCRATCH_ADDR(3DSTATE_DS); break; + case MESA_SHADER_GEOMETRY: MERGE_SCRATCH_ADDR(3DSTATE_GS); break; + } } else { iris_batch_emit(batch, shader->derived_data, iris_derived_program_state_size(stage)); _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
