Module: Mesa Branch: master Commit: 5d14562da86f1f3ee1a747183d8bac183ce75fd9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d14562da86f1f3ee1a747183d8bac183ce75fd9
Author: Indrajit Kumar Das <[email protected]> Date: Fri Oct 16 10:27:02 2020 +0530 radeonsi/gfx10: fix overflow and primitive queries This aligns the offsets to match the memory layout of the query buffer defined by gfx10_sh_query_buffer_mem and calls si_launch_grid_internal to flush caches and wait for completion of shaders prior to retrieving results. Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Reviewed-by: Marek Olšák <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7181> --- src/gallium/drivers/radeonsi/gfx10_query.c | 11 ++++++----- src/gallium/drivers/radeonsi/si_compute_blit.c | 10 ++++------ src/gallium/drivers/radeonsi/si_pipe.h | 7 +++++++ src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 23 +++++++++++------------ 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_query.c b/src/gallium/drivers/radeonsi/gfx10_query.c index 18823a7c929..cb541f06bef 100644 --- a/src/gallium/drivers/radeonsi/gfx10_query.c +++ b/src/gallium/drivers/radeonsi/gfx10_query.c @@ -360,11 +360,11 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s if (index >= 0) { switch (query->b.type) { case PIPE_QUERY_PRIMITIVES_GENERATED: - consts.offset = sizeof(uint32_t) * query->stream; + consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t); consts.config = 0; break; case PIPE_QUERY_PRIMITIVES_EMITTED: - consts.offset = sizeof(uint32_t) * (4 + query->stream); + consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t); consts.config = 0; break; case PIPE_QUERY_SO_STATISTICS: @@ -372,7 +372,7 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s consts.config = 0; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - consts.offset = sizeof(uint32_t) * query->stream; + consts.offset = 4 * sizeof(uint64_t) * query->stream; consts.config = 2; break; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: @@ -454,8 +454,9 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0); } - sctx->b.launch_grid(&sctx->b, &grid); - sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; + void *saved_cs = sctx->cs_shader_state.program; + si_launch_grid_internal((struct si_context *)&sctx->b, &grid, saved_cs, + SI_CS_WAIT_FOR_IDLE | SI_CS_PARTIAL_FLUSH_DISABLE); if (qbuf == query->last) break; diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index a66968c167d..62d022c394d 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -60,15 +60,13 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, } } -#define SI_CS_IMAGE_OP (1 << 0) -#define SI_CS_WAIT_FOR_IDLE (1 << 1) -#define SI_CS_RENDER_COND_ENABLE (1 << 2) - -static void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info, +void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info, void *restore_cs, unsigned flags) { /* Wait for previous shaders to finish. */ - sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH; + sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; + if (!(flags & SI_CS_PARTIAL_FLUSH_DISABLE)) + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; /* Invalidate L0-L1 caches. */ /* sL0 is never invalidated, because src resources don't use it. */ sctx->flags |= SI_CONTEXT_INV_VCACHE; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e7d575308fd..20230556744 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1339,8 +1339,15 @@ bool vi_dcc_clear_level(struct si_context *sctx, struct si_texture *tex, unsigne void si_init_clear_functions(struct si_context *sctx); /* si_compute_blit.c */ +#define SI_CS_IMAGE_OP (1 << 0) +#define SI_CS_WAIT_FOR_IDLE (1 << 1) +#define SI_CS_RENDER_COND_ENABLE (1 << 2) +#define SI_CS_PARTIAL_FLUSH_DISABLE (1 << 3) + unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, enum si_cache_policy cache_policy); +void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info, + void *restore_cs, unsigned flags); void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, uint32_t *clear_value, uint32_t clear_value_size, enum si_coherency coher, bool force_cpdma); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index c1a150d6ab3..68e2e041acf 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -816,7 +816,7 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx) "DCL BUFFER[2]\n" "DCL CONST[0][0..0]\n" "DCL TEMP[0..5]\n" - "IMM[0] UINT32 {0, 7, 0, 4294967295}\n" + "IMM[0] UINT32 {0, 7, 256, 4294967295}\n" "IMM[1] UINT32 {1, 2, 4, 8}\n" "IMM[2] UINT32 {16, 32, 64, 128}\n" @@ -855,13 +855,13 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx) "UADD TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww\n" /* - fence = buffer[0]@(base_offset + 32); + fence = buffer[0]@(base_offset + sizeof(gfx10_sh_query_buffer_mem.stream)); if (!fence) { acc_missing = ~0u; break; } */ - "UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy\n" + "UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].wwww\n" "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" "USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n" "UIF TEMP[5]\n" @@ -897,22 +897,21 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx) /* do { - generated = buffer[0]@stream_offset; - emitted = buffer[0]@(stream_offset + 16); + generated = buffer[0]@(stream_offset + 2 * sizeof(uint64_t)); + emitted = buffer[0]@(stream_offset + 3 * sizeof(uint64_t)); if (generated != emitted) { acc_result = 1; result_remaining = 0; break; } - stream_offset += 4; + stream_offset += sizeof(gfx10_sh_query_buffer_mem.stream[0]); } while (--count); */ "BGNLOOP\n" "UADD TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx\n" - "LOAD TEMP[4].x, BUFFER[0], TEMP[2].xxxx\n" - "LOAD TEMP[4].y, BUFFER[0], TEMP[5].xxxx\n" - "USNE TEMP[5], TEMP[4].xxxx, TEMP[4].yyyy\n" + "LOAD TEMP[4].xyzw, BUFFER[0], TEMP[5].xxxx\n" + "USNE TEMP[5], TEMP[4].xyxy, TEMP[4].zwzw\n" "UIF TEMP[5]\n" "MOV TEMP[0].x, IMM[1].xxxx\n" "MOV TEMP[1].y, IMM[0].xxxx\n" @@ -924,15 +923,15 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx) "UIF TEMP[5]\n" "BRK\n" "ENDIF\n" - "UADD TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz\n" + "UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy\n" "ENDLOOP\n" "ENDIF\n" /* - base_offset += 64; + base_offset += sizeof(gfx10_sh_query_buffer_mem); } // end outer loop */ - "UADD TEMP[1].y, TEMP[1].yyyy, IMM[2].zzzz\n" + "UADD TEMP[1].y, TEMP[1].yyyy, IMM[0].zzzz\n" "ENDLOOP\n" /* _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
