Re: [Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS
okay, r-b with that. On Thu, Aug 30, 2018 at 1:39 PM Samuel Pitoiset wrote: > > > > On 8/30/18 12:30 PM, Bas Nieuwenhuizen wrote: > > On Wed, Aug 29, 2018 at 10:55 PM Samuel Pitoiset > > wrote: > >> > >> CTS doesn't test input clip/cull distances for the fragment > >> shader stage, which explains why this was totally broken. I > >> wrote a simple test locally that works now. > >> > >> This fixes a crash with GTA V and DXVK. > >> > >> Note that we are exporting unused parameters from the vertex > >> shader now, but this can't be optimized easily because we don't > >> keep the fragment shader info... > >> > >> Cc: mesa-sta...@lists.freedesktop.org > >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477 > >> Signed-off-by: Samuel Pitoiset > >> --- > >> src/amd/vulkan/radv_nir_to_llvm.c | 30 +- > >> src/amd/vulkan/radv_pipeline.c| 16 > >> src/amd/vulkan/radv_shader.h | 1 + > >> src/amd/vulkan/radv_shader_info.c | 4 > >> 4 files changed, 50 insertions(+), 1 deletion(-) > >> > >> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > >> b/src/amd/vulkan/radv_nir_to_llvm.c > >> index 4940e3230f..d7cd8cc069 100644 > >> --- a/src/amd/vulkan/radv_nir_to_llvm.c > >> +++ b/src/amd/vulkan/radv_nir_to_llvm.c > >> @@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context > >> *ctx, > >> int idx = variable->data.location; > >> unsigned attrib_count = > >> glsl_count_attribute_slots(variable->type, false); > >> LLVMValueRef interp = NULL; > >> + uint64_t mask; > >> > >> variable->data.driver_location = idx * 4; > >> - ctx->input_mask |= ((1ull << attrib_count) - 1) << > >> variable->data.location; > >> + mask = ((1ull << attrib_count) - 1) << variable->data.location; > >> > >> if (glsl_get_base_type(glsl_without_array(variable->type)) == > >> GLSL_TYPE_FLOAT) { > >> unsigned interp_type; > >> @@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context > >> *ctx, > >> for (unsigned i = 0; i < attrib_count; ++i) > >> ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; > >> > >> + if (idx == VARYING_SLOT_CLIP_DIST0) { > >> + /* Do not account for the number of components inside the > >> array > >> +* of clip/cull distances because this might wrongly set > >> other > >> +* bits like primitive ID or layer. > >> +*/ > >> + mask = 1ull << VARYING_SLOT_CLIP_DIST0; > >> + } > >> + > >> + ctx->input_mask |= mask; > >> } > >> > >> static void > >> @@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx, > >> if (LLVMIsUndef(interp_param)) > >> ctx->shader_info->fs.flat_shaded_mask |= > >> 1u << index; > >> ++index; > >> + } else if (i == VARYING_SLOT_CLIP_DIST0) { > >> + int length = > >> ctx->shader_info->info.ps.num_input_clips_culls; > >> + > >> + for (unsigned j = 0; j < length; j += 4) { > >> + inputs = ctx->inputs + > >> ac_llvm_reg_index_soa(i, j); > >> + > >> + interp_param = *inputs; > >> + interp_fs_input(ctx, index, interp_param, > >> + ctx->abi.prim_mask, > >> inputs); > >> + ++index; > >> + } > >> } else if (i == VARYING_SLOT_POS) { > >> for(int i = 0; i < 3; ++i) > >> inputs[i] = ctx->abi.frag_pos[i]; > >> @@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context > >> *ctx, > >> memcpy(_args[target - V_008DFC_SQ_EXP_POS], > >> , sizeof(args)); > >> > >> + /* Export the clip/cull distances values to the next > >> stage. */ > >> + radv_export_param(ctx, param_count, [0], 0xf); > >> + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = > >> param_count++; > >> + if (ctx->num_output_clips + ctx->num_output_culls > 4) { > >> + radv_export_param(ctx, param_count, [4], > >> 0xf); > >> + > >> outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++; > >> + } > >> } > >> > >> LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, > >> ctx->ac.f32_0, ctx->ac.f32_1}; > >> diff --git a/src/amd/vulkan/radv_pipeline.c > >> b/src/amd/vulkan/radv_pipeline.c > >> index e63c481d1e..0303642d7e 100644 > >> --- a/src/amd/vulkan/radv_pipeline.c > >> +++ b/src/amd/vulkan/radv_pipeline.c > >> @@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct > >> radeon_cmdbuf *cs, > >>
Re: [Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS
On 8/30/18 12:30 PM, Bas Nieuwenhuizen wrote: On Wed, Aug 29, 2018 at 10:55 PM Samuel Pitoiset wrote: CTS doesn't test input clip/cull distances for the fragment shader stage, which explains why this was totally broken. I wrote a simple test locally that works now. This fixes a crash with GTA V and DXVK. Note that we are exporting unused parameters from the vertex shader now, but this can't be optimized easily because we don't keep the fragment shader info... Cc: mesa-sta...@lists.freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477 Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 30 +- src/amd/vulkan/radv_pipeline.c| 16 src/amd/vulkan/radv_shader.h | 1 + src/amd/vulkan/radv_shader_info.c | 4 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 4940e3230f..d7cd8cc069 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx, int idx = variable->data.location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); LLVMValueRef interp = NULL; + uint64_t mask; variable->data.driver_location = idx * 4; - ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; + mask = ((1ull << attrib_count) - 1) << variable->data.location; if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { unsigned interp_type; @@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx, for (unsigned i = 0; i < attrib_count; ++i) ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; + if (idx == VARYING_SLOT_CLIP_DIST0) { + /* Do not account for the number of components inside the array +* of clip/cull distances because this might wrongly set other +* bits like primitive ID or layer. +*/ + mask = 1ull << VARYING_SLOT_CLIP_DIST0; + } + + ctx->input_mask |= mask; } static void @@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx, if (LLVMIsUndef(interp_param)) ctx->shader_info->fs.flat_shaded_mask |= 1u << index; ++index; + } else if (i == VARYING_SLOT_CLIP_DIST0) { + int length = ctx->shader_info->info.ps.num_input_clips_culls; + + for (unsigned j = 0; j < length; j += 4) { + inputs = ctx->inputs + ac_llvm_reg_index_soa(i, j); + + interp_param = *inputs; + interp_fs_input(ctx, index, interp_param, + ctx->abi.prim_mask, inputs); + ++index; + } } else if (i == VARYING_SLOT_POS) { for(int i = 0; i < 3; ++i) inputs[i] = ctx->abi.frag_pos[i]; @@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, memcpy(_args[target - V_008DFC_SQ_EXP_POS], , sizeof(args)); + /* Export the clip/cull distances values to the next stage. */ + radv_export_param(ctx, param_count, [0], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++; + if (ctx->num_output_clips + ctx->num_output_culls > 4) { + radv_export_param(ctx, param_count, [4], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++; + } } LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e63c481d1e..0303642d7e 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs, ps_offset++; } + if (ps->info.info.ps.num_input_clips_culls) { + unsigned vs_offset; + + vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ++ps_offset; + } + + vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { && ps->info.info.ps.num_input_clips_culls > 4 ? Otherwise ps_offset can be
Re: [Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS
On Wed, Aug 29, 2018 at 10:55 PM Samuel Pitoiset wrote: > > CTS doesn't test input clip/cull distances for the fragment > shader stage, which explains why this was totally broken. I > wrote a simple test locally that works now. > > This fixes a crash with GTA V and DXVK. > > Note that we are exporting unused parameters from the vertex > shader now, but this can't be optimized easily because we don't > keep the fragment shader info... > > Cc: mesa-sta...@lists.freedesktop.org > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477 > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_nir_to_llvm.c | 30 +- > src/amd/vulkan/radv_pipeline.c| 16 > src/amd/vulkan/radv_shader.h | 1 + > src/amd/vulkan/radv_shader_info.c | 4 > 4 files changed, 50 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index 4940e3230f..d7cd8cc069 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx, > int idx = variable->data.location; > unsigned attrib_count = glsl_count_attribute_slots(variable->type, > false); > LLVMValueRef interp = NULL; > + uint64_t mask; > > variable->data.driver_location = idx * 4; > - ctx->input_mask |= ((1ull << attrib_count) - 1) << > variable->data.location; > + mask = ((1ull << attrib_count) - 1) << variable->data.location; > > if (glsl_get_base_type(glsl_without_array(variable->type)) == > GLSL_TYPE_FLOAT) { > unsigned interp_type; > @@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx, > for (unsigned i = 0; i < attrib_count; ++i) > ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; > > + if (idx == VARYING_SLOT_CLIP_DIST0) { > + /* Do not account for the number of components inside the > array > +* of clip/cull distances because this might wrongly set other > +* bits like primitive ID or layer. > +*/ > + mask = 1ull << VARYING_SLOT_CLIP_DIST0; > + } > + > + ctx->input_mask |= mask; > } > > static void > @@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx, > if (LLVMIsUndef(interp_param)) > ctx->shader_info->fs.flat_shaded_mask |= 1u > << index; > ++index; > + } else if (i == VARYING_SLOT_CLIP_DIST0) { > + int length = > ctx->shader_info->info.ps.num_input_clips_culls; > + > + for (unsigned j = 0; j < length; j += 4) { > + inputs = ctx->inputs + > ac_llvm_reg_index_soa(i, j); > + > + interp_param = *inputs; > + interp_fs_input(ctx, index, interp_param, > + ctx->abi.prim_mask, inputs); > + ++index; > + } > } else if (i == VARYING_SLOT_POS) { > for(int i = 0; i < 3; ++i) > inputs[i] = ctx->abi.frag_pos[i]; > @@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, > memcpy(_args[target - V_008DFC_SQ_EXP_POS], >, sizeof(args)); > > + /* Export the clip/cull distances values to the next stage. */ > + radv_export_param(ctx, param_count, [0], 0xf); > + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = > param_count++; > + if (ctx->num_output_clips + ctx->num_output_culls > 4) { > + radv_export_param(ctx, param_count, [4], 0xf); > + > outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++; > + } > } > > LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, > ctx->ac.f32_0, ctx->ac.f32_1}; > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index e63c481d1e..0303642d7e 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf > *cs, > ps_offset++; > } > > + if (ps->info.info.ps.num_input_clips_culls) { > + unsigned vs_offset; > + > + vs_offset = > outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; > + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { > + ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, true); > + ++ps_offset; > + } > + > + vs_offset = >
[Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS
CTS doesn't test input clip/cull distances for the fragment shader stage, which explains why this was totally broken. I wrote a simple test locally that works now. This fixes a crash with GTA V and DXVK. Note that we are exporting unused parameters from the vertex shader now, but this can't be optimized easily because we don't keep the fragment shader info... Cc: mesa-sta...@lists.freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477 Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 30 +- src/amd/vulkan/radv_pipeline.c| 16 src/amd/vulkan/radv_shader.h | 1 + src/amd/vulkan/radv_shader_info.c | 4 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 4940e3230f..d7cd8cc069 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx, int idx = variable->data.location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); LLVMValueRef interp = NULL; + uint64_t mask; variable->data.driver_location = idx * 4; - ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; + mask = ((1ull << attrib_count) - 1) << variable->data.location; if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { unsigned interp_type; @@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx, for (unsigned i = 0; i < attrib_count; ++i) ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; + if (idx == VARYING_SLOT_CLIP_DIST0) { + /* Do not account for the number of components inside the array +* of clip/cull distances because this might wrongly set other +* bits like primitive ID or layer. +*/ + mask = 1ull << VARYING_SLOT_CLIP_DIST0; + } + + ctx->input_mask |= mask; } static void @@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx, if (LLVMIsUndef(interp_param)) ctx->shader_info->fs.flat_shaded_mask |= 1u << index; ++index; + } else if (i == VARYING_SLOT_CLIP_DIST0) { + int length = ctx->shader_info->info.ps.num_input_clips_culls; + + for (unsigned j = 0; j < length; j += 4) { + inputs = ctx->inputs + ac_llvm_reg_index_soa(i, j); + + interp_param = *inputs; + interp_fs_input(ctx, index, interp_param, + ctx->abi.prim_mask, inputs); + ++index; + } } else if (i == VARYING_SLOT_POS) { for(int i = 0; i < 3; ++i) inputs[i] = ctx->abi.frag_pos[i]; @@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, memcpy(_args[target - V_008DFC_SQ_EXP_POS], , sizeof(args)); + /* Export the clip/cull distances values to the next stage. */ + radv_export_param(ctx, param_count, [0], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++; + if (ctx->num_output_clips + ctx->num_output_culls > 4) { + radv_export_param(ctx, param_count, [4], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++; + } } LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e63c481d1e..0303642d7e 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs, ps_offset++; } + if (ps->info.info.ps.num_input_clips_culls) { + unsigned vs_offset; + + vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ++ps_offset; + } + + vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ++ps_offset; + } + } + for (unsigned i = 0; i < 32 && (1u << i) <=