Re: [Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS

2018-08-30 Thread Bas Nieuwenhuizen
okay, r-b with that.
On Thu, Aug 30, 2018 at 1:39 PM Samuel Pitoiset
 wrote:
>
>
>
> On 8/30/18 12:30 PM, Bas Nieuwenhuizen wrote:
> > On Wed, Aug 29, 2018 at 10:55 PM Samuel Pitoiset
> >  wrote:
> >>
> >> CTS doesn't test input clip/cull distances for the fragment
> >> shader stage, which explains why this was totally broken. I
> >> wrote a simple test locally that works now.
> >>
> >> This fixes a crash with GTA V and DXVK.
> >>
> >> Note that we are exporting unused parameters from the vertex
> >> shader now, but this can't be optimized easily because we don't
> >> keep the fragment shader info...
> >>
> >> Cc: mesa-sta...@lists.freedesktop.org
> >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>   src/amd/vulkan/radv_nir_to_llvm.c | 30 +-
> >>   src/amd/vulkan/radv_pipeline.c| 16 
> >>   src/amd/vulkan/radv_shader.h  |  1 +
> >>   src/amd/vulkan/radv_shader_info.c |  4 
> >>   4 files changed, 50 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> >> b/src/amd/vulkan/radv_nir_to_llvm.c
> >> index 4940e3230f..d7cd8cc069 100644
> >> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> >> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> >> @@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context 
> >> *ctx,
> >>  int idx = variable->data.location;
> >>  unsigned attrib_count = 
> >> glsl_count_attribute_slots(variable->type, false);
> >>  LLVMValueRef interp = NULL;
> >> +   uint64_t mask;
> >>
> >>  variable->data.driver_location = idx * 4;
> >> -   ctx->input_mask |= ((1ull << attrib_count) - 1) << 
> >> variable->data.location;
> >> +   mask = ((1ull << attrib_count) - 1) << variable->data.location;
> >>
> >>  if (glsl_get_base_type(glsl_without_array(variable->type)) == 
> >> GLSL_TYPE_FLOAT) {
> >>  unsigned interp_type;
> >> @@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context 
> >> *ctx,
> >>  for (unsigned i = 0; i < attrib_count; ++i)
> >>  ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
> >>
> >> +   if (idx == VARYING_SLOT_CLIP_DIST0) {
> >> +   /* Do not account for the number of components inside the 
> >> array
> >> +* of clip/cull distances because this might wrongly set 
> >> other
> >> +* bits like primitive ID or layer.
> >> +*/
> >> +   mask = 1ull << VARYING_SLOT_CLIP_DIST0;
> >> +   }
> >> +
> >> +   ctx->input_mask |= mask;
> >>   }
> >>
> >>   static void
> >> @@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx,
> >>  if (LLVMIsUndef(interp_param))
> >>  ctx->shader_info->fs.flat_shaded_mask |= 
> >> 1u << index;
> >>  ++index;
> >> +   } else if (i == VARYING_SLOT_CLIP_DIST0) {
> >> +   int length = 
> >> ctx->shader_info->info.ps.num_input_clips_culls;
> >> +
> >> +   for (unsigned j = 0; j < length; j += 4) {
> >> +   inputs = ctx->inputs + 
> >> ac_llvm_reg_index_soa(i, j);
> >> +
> >> +   interp_param = *inputs;
> >> +   interp_fs_input(ctx, index, interp_param,
> >> +   ctx->abi.prim_mask, 
> >> inputs);
> >> +   ++index;
> >> +   }
> >>  } else if (i == VARYING_SLOT_POS) {
> >>  for(int i = 0; i < 3; ++i)
> >>  inputs[i] = ctx->abi.frag_pos[i];
> >> @@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context 
> >> *ctx,
> >>  memcpy(_args[target - V_008DFC_SQ_EXP_POS],
> >> , sizeof(args));
> >>
> >> +   /* Export the clip/cull distances values to the next 
> >> stage. */
> >> +   radv_export_param(ctx, param_count, [0], 0xf);
> >> +   outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = 
> >> param_count++;
> >> +   if (ctx->num_output_clips + ctx->num_output_culls > 4) {
> >> +   radv_export_param(ctx, param_count, [4], 
> >> 0xf);
> >> +   
> >> outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
> >> +   }
> >>  }
> >>
> >>  LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, 
> >> ctx->ac.f32_0, ctx->ac.f32_1};
> >> diff --git a/src/amd/vulkan/radv_pipeline.c 
> >> b/src/amd/vulkan/radv_pipeline.c
> >> index e63c481d1e..0303642d7e 100644
> >> --- a/src/amd/vulkan/radv_pipeline.c
> >> +++ b/src/amd/vulkan/radv_pipeline.c
> >> @@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct 
> >> radeon_cmdbuf *cs,
> >>

Re: [Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS

2018-08-30 Thread Samuel Pitoiset



On 8/30/18 12:30 PM, Bas Nieuwenhuizen wrote:

On Wed, Aug 29, 2018 at 10:55 PM Samuel Pitoiset
 wrote:


CTS doesn't test input clip/cull distances for the fragment
shader stage, which explains why this was totally broken. I
wrote a simple test locally that works now.

This fixes a crash with GTA V and DXVK.

Note that we are exporting unused parameters from the vertex
shader now, but this can't be optimized easily because we don't
keep the fragment shader info...

Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477
Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_nir_to_llvm.c | 30 +-
  src/amd/vulkan/radv_pipeline.c| 16 
  src/amd/vulkan/radv_shader.h  |  1 +
  src/amd/vulkan/radv_shader_info.c |  4 
  4 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 4940e3230f..d7cd8cc069 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
 int idx = variable->data.location;
 unsigned attrib_count = glsl_count_attribute_slots(variable->type, 
false);
 LLVMValueRef interp = NULL;
+   uint64_t mask;

 variable->data.driver_location = idx * 4;
-   ctx->input_mask |= ((1ull << attrib_count) - 1) << 
variable->data.location;
+   mask = ((1ull << attrib_count) - 1) << variable->data.location;

 if (glsl_get_base_type(glsl_without_array(variable->type)) == 
GLSL_TYPE_FLOAT) {
 unsigned interp_type;
@@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
 for (unsigned i = 0; i < attrib_count; ++i)
 ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;

+   if (idx == VARYING_SLOT_CLIP_DIST0) {
+   /* Do not account for the number of components inside the array
+* of clip/cull distances because this might wrongly set other
+* bits like primitive ID or layer.
+*/
+   mask = 1ull << VARYING_SLOT_CLIP_DIST0;
+   }
+
+   ctx->input_mask |= mask;
  }

  static void
@@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx,
 if (LLVMIsUndef(interp_param))
 ctx->shader_info->fs.flat_shaded_mask |= 1u << 
index;
 ++index;
+   } else if (i == VARYING_SLOT_CLIP_DIST0) {
+   int length = 
ctx->shader_info->info.ps.num_input_clips_culls;
+
+   for (unsigned j = 0; j < length; j += 4) {
+   inputs = ctx->inputs + ac_llvm_reg_index_soa(i, 
j);
+
+   interp_param = *inputs;
+   interp_fs_input(ctx, index, interp_param,
+   ctx->abi.prim_mask, inputs);
+   ++index;
+   }
 } else if (i == VARYING_SLOT_POS) {
 for(int i = 0; i < 3; ++i)
 inputs[i] = ctx->abi.frag_pos[i];
@@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
 memcpy(_args[target - V_008DFC_SQ_EXP_POS],
, sizeof(args));

+   /* Export the clip/cull distances values to the next stage. */
+   radv_export_param(ctx, param_count, [0], 0xf);
+   outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = 
param_count++;
+   if (ctx->num_output_clips + ctx->num_output_culls > 4) {
+   radv_export_param(ctx, param_count, [4], 0xf);
+   
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
+   }
 }

 LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, 
ctx->ac.f32_0, ctx->ac.f32_1};
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index e63c481d1e..0303642d7e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*cs,
 ps_offset++;
 }

+   if (ps->info.info.ps.num_input_clips_culls) {
+   unsigned vs_offset;
+
+   vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
+   if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+   ++ps_offset;
+   }
+
+   vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
+   if (vs_offset != AC_EXP_PARAM_UNDEFINED) {


  &&  ps->info.info.ps.num_input_clips_culls > 4 ? Otherwise ps_offset
can be 

Re: [Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS

2018-08-30 Thread Bas Nieuwenhuizen
On Wed, Aug 29, 2018 at 10:55 PM Samuel Pitoiset
 wrote:
>
> CTS doesn't test input clip/cull distances for the fragment
> shader stage, which explains why this was totally broken. I
> wrote a simple test locally that works now.
>
> This fixes a crash with GTA V and DXVK.
>
> Note that we are exporting unused parameters from the vertex
> shader now, but this can't be optimized easily because we don't
> keep the fragment shader info...
>
> Cc: mesa-sta...@lists.freedesktop.org
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c | 30 +-
>  src/amd/vulkan/radv_pipeline.c| 16 
>  src/amd/vulkan/radv_shader.h  |  1 +
>  src/amd/vulkan/radv_shader_info.c |  4 
>  4 files changed, 50 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index 4940e3230f..d7cd8cc069 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
> int idx = variable->data.location;
> unsigned attrib_count = glsl_count_attribute_slots(variable->type, 
> false);
> LLVMValueRef interp = NULL;
> +   uint64_t mask;
>
> variable->data.driver_location = idx * 4;
> -   ctx->input_mask |= ((1ull << attrib_count) - 1) << 
> variable->data.location;
> +   mask = ((1ull << attrib_count) - 1) << variable->data.location;
>
> if (glsl_get_base_type(glsl_without_array(variable->type)) == 
> GLSL_TYPE_FLOAT) {
> unsigned interp_type;
> @@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
> for (unsigned i = 0; i < attrib_count; ++i)
> ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
>
> +   if (idx == VARYING_SLOT_CLIP_DIST0) {
> +   /* Do not account for the number of components inside the 
> array
> +* of clip/cull distances because this might wrongly set other
> +* bits like primitive ID or layer.
> +*/
> +   mask = 1ull << VARYING_SLOT_CLIP_DIST0;
> +   }
> +
> +   ctx->input_mask |= mask;
>  }
>
>  static void
> @@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx,
> if (LLVMIsUndef(interp_param))
> ctx->shader_info->fs.flat_shaded_mask |= 1u 
> << index;
> ++index;
> +   } else if (i == VARYING_SLOT_CLIP_DIST0) {
> +   int length = 
> ctx->shader_info->info.ps.num_input_clips_culls;
> +
> +   for (unsigned j = 0; j < length; j += 4) {
> +   inputs = ctx->inputs + 
> ac_llvm_reg_index_soa(i, j);
> +
> +   interp_param = *inputs;
> +   interp_fs_input(ctx, index, interp_param,
> +   ctx->abi.prim_mask, inputs);
> +   ++index;
> +   }
> } else if (i == VARYING_SLOT_POS) {
> for(int i = 0; i < 3; ++i)
> inputs[i] = ctx->abi.frag_pos[i];
> @@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
> memcpy(_args[target - V_008DFC_SQ_EXP_POS],
>, sizeof(args));
>
> +   /* Export the clip/cull distances values to the next stage. */
> +   radv_export_param(ctx, param_count, [0], 0xf);
> +   outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = 
> param_count++;
> +   if (ctx->num_output_clips + ctx->num_output_culls > 4) {
> +   radv_export_param(ctx, param_count, [4], 0xf);
> +   
> outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
> +   }
> }
>
> LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, 
> ctx->ac.f32_0, ctx->ac.f32_1};
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index e63c481d1e..0303642d7e 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
> *cs,
> ps_offset++;
> }
>
> +   if (ps->info.info.ps.num_input_clips_culls) {
> +   unsigned vs_offset;
> +
> +   vs_offset = 
> outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
> +   if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
> +   ps_input_cntl[ps_offset] = 
> offset_to_ps_input(vs_offset, true);
> +   ++ps_offset;
> +   }
> +
> +   vs_offset = 
> 

[Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS

2018-08-29 Thread Samuel Pitoiset
CTS doesn't test input clip/cull distances for the fragment
shader stage, which explains why this was totally broken. I
wrote a simple test locally that works now.

This fixes a crash with GTA V and DXVK.

Note that we are exporting unused parameters from the vertex
shader now, but this can't be optimized easily because we don't
keep the fragment shader info...

Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 30 +-
 src/amd/vulkan/radv_pipeline.c| 16 
 src/amd/vulkan/radv_shader.h  |  1 +
 src/amd/vulkan/radv_shader_info.c |  4 
 4 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 4940e3230f..d7cd8cc069 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, 
false);
LLVMValueRef interp = NULL;
+   uint64_t mask;
 
variable->data.driver_location = idx * 4;
-   ctx->input_mask |= ((1ull << attrib_count) - 1) << 
variable->data.location;
+   mask = ((1ull << attrib_count) - 1) << variable->data.location;
 
if (glsl_get_base_type(glsl_without_array(variable->type)) == 
GLSL_TYPE_FLOAT) {
unsigned interp_type;
@@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
 
+   if (idx == VARYING_SLOT_CLIP_DIST0) {
+   /* Do not account for the number of components inside the array
+* of clip/cull distances because this might wrongly set other
+* bits like primitive ID or layer.
+*/
+   mask = 1ull << VARYING_SLOT_CLIP_DIST0;
+   }
+
+   ctx->input_mask |= mask;
 }
 
 static void
@@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (LLVMIsUndef(interp_param))
ctx->shader_info->fs.flat_shaded_mask |= 1u << 
index;
++index;
+   } else if (i == VARYING_SLOT_CLIP_DIST0) {
+   int length = 
ctx->shader_info->info.ps.num_input_clips_culls;
+
+   for (unsigned j = 0; j < length; j += 4) {
+   inputs = ctx->inputs + ac_llvm_reg_index_soa(i, 
j);
+
+   interp_param = *inputs;
+   interp_fs_input(ctx, index, interp_param,
+   ctx->abi.prim_mask, inputs);
+   ++index;
+   }
} else if (i == VARYING_SLOT_POS) {
for(int i = 0; i < 3; ++i)
inputs[i] = ctx->abi.frag_pos[i];
@@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
memcpy(_args[target - V_008DFC_SQ_EXP_POS],
   , sizeof(args));
 
+   /* Export the clip/cull distances values to the next stage. */
+   radv_export_param(ctx, param_count, [0], 0xf);
+   outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = 
param_count++;
+   if (ctx->num_output_clips + ctx->num_output_culls > 4) {
+   radv_export_param(ctx, param_count, [4], 0xf);
+   
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
+   }
}
 
LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, 
ctx->ac.f32_0, ctx->ac.f32_1};
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index e63c481d1e..0303642d7e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*cs,
ps_offset++;
}
 
+   if (ps->info.info.ps.num_input_clips_culls) {
+   unsigned vs_offset;
+
+   vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
+   if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+   ++ps_offset;
+   }
+
+   vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
+   if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+   ++ps_offset;
+   }
+   }
+
for (unsigned i = 0; i < 32 && (1u << i) <=