Module: Mesa Branch: main Commit: 102f6ebc57523c3e6637e8af9fb635c4b8e414c4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=102f6ebc57523c3e6637e8af9fb635c4b8e414c4
Author: Mike Blumenkrantz <[email protected]> Date: Fri Mar 11 17:03:30 2022 -0500 llvmpipe: fix subgroup id construction the coroutine idx is based on the number of x loops, but the subgroup id is based on the coroutine's total loops Reviewed-by: Dave Airlie <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15354> --- src/gallium/drivers/llvmpipe/lp_state_cs.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index 61213175f67..7feda086c47 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -372,7 +372,25 @@ generate_compute(struct llvmpipe_context *lp, system_values.work_dim = work_dim_arg; - system_values.subgroup_id = coro_idx; + /* subgroup_id = ((z * block_size_x * block_size_y) + (y * block_size_x) + x) / subgroup_size + * + * this breaks if z or y is zero, so distribute the division to preserve ids + * + * subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + (x / subgroup_size) + * + * except "x" is pre-divided here + * + * subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + x + */ + LLVMValueRef subgroup_id = LLVMBuildUDiv(builder, + LLVMBuildMul(gallivm->builder, z_size_arg, LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, ""), ""), + vec_length, ""); + subgroup_id = LLVMBuildAdd(gallivm->builder, + subgroup_id, + LLVMBuildUDiv(builder, LLVMBuildMul(gallivm->builder, y_size_arg, block_x_size_arg, ""), vec_length, ""), + ""); + subgroup_id = LLVMBuildAdd(gallivm->builder, subgroup_id, x_size_arg, ""); + system_values.subgroup_id = subgroup_id; LLVMValueRef num_subgroups = LLVMBuildUDiv(builder, LLVMBuildMul(builder, block_x_size_arg, LLVMBuildMul(builder, block_y_size_arg, block_z_size_arg, ""), ""),
