Re: [Mesa-dev] [PATCH] radv: implement multisample image copies

2018-03-09 Thread Bas Nieuwenhuizen
Hi Dave,

I'd appreciate if you could split all the ac stuff into a separate patch.

The big issue I see with the patch is that we don't support
multisample image stores yet. In particular, you have to copy over the
fmask value (and find a way to skip over the fmask lookup in the tex
instruction) or clear the fmask just for the dst rect.

- Bas

On Fri, Mar 9, 2018 at 7:30 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> It appears its quite legal to do image copies on multisample
> images, however due to a bug in our txf handling and incomplete
> tests we never actually noticed we didn't do it properly in radv.
>
> This patch implements a compute shader to copy multiple samples
> of an image to another image. It implements the nir txf_ms_mcs
> opcode for getting the fmask value, and then uses that value
> to either copy sample 0 to all samples, or iterate across
> the valid samples copying them.
>
> The shader is inspired by one RE'd from AMDVLK.
>
> Fixes:
> dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image*
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_nir_to_llvm.c |  54 ++---
>  src/amd/vulkan/radv_meta_bufimage.c | 229 
> ++--
>  src/amd/vulkan/radv_meta_copy.c |   2 +-
>  src/amd/vulkan/radv_private.h   |   2 +-
>  4 files changed, 258 insertions(+), 29 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 9b850698608..071f54a5b61 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2277,6 +2277,7 @@ static LLVMValueRef build_tex_intrinsic(struct 
> ac_nir_context *ctx,
> switch (instr->op) {
> case nir_texop_txf:
> case nir_texop_txf_ms:
> +   case nir_texop_txf_ms_mcs:
> case nir_texop_samples_identical:
> args->opcode = lod_is_zero ||
>instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
> @@ -3417,6 +3418,24 @@ glsl_is_array_image(const struct glsl_type *type)
>  }
>
>
> +static LLVMValueRef get_fmask_desc_valid(struct ac_llvm_context *ctx,
> +   LLVMValueRef fmask_desc_ptr)
> +{
> +   LLVMValueRef fmask_desc =
> +   LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
> +ctx->v8i32, "");
> +
> +   LLVMValueRef fmask_word1 =
> +   LLVMBuildExtractElement(ctx->builder, fmask_desc,
> +   ctx->i32_1, "");
> +
> +   LLVMValueRef word1_is_nonzero =
> +   LLVMBuildICmp(ctx->builder, LLVMIntNE,
> + fmask_word1, ctx->i32_0, "");
> +
> +   return word1_is_nonzero;
> +}
> +
>  /* Adjust the sample index according to FMASK.
>   *
>   * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
> @@ -3475,17 +3494,7 @@ static LLVMValueRef 
> adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
> /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
>  * resource descriptor is 0 (invalid),
>  */
> -   LLVMValueRef fmask_desc =
> -   LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
> -ctx->v8i32, "");
> -
> -   LLVMValueRef fmask_word1 =
> -   LLVMBuildExtractElement(ctx->builder, fmask_desc,
> -   ctx->i32_1, "");
> -
> -   LLVMValueRef word1_is_nonzero =
> -   LLVMBuildICmp(ctx->builder, LLVMIntNE,
> - fmask_word1, ctx->i32_0, "");
> +   LLVMValueRef word1_is_nonzero = get_fmask_desc_valid(ctx, 
> fmask_desc_ptr);
>
> /* Replace the MSAA sample index. */
> sample_index =
> @@ -3518,7 +3527,7 @@ static LLVMValueRef get_image_coords(struct 
> ac_nir_context *ctx,
> bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == 
> GLSL_SAMPLER_DIM_1D;
> count = image_type_to_components_count(dim, is_array);
>
> -   if (is_ms) {
> +   if (is_ms && instr->intrinsic == nir_intrinsic_image_load) {
> LLVMValueRef fmask_load_address[3];
> int chan;
>
> @@ -4899,7 +4908,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx,
> if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
> *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, 
> *samp_ptr);
> }
> -   if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
> +   if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || 
> instr->op == nir_texop_txf_ms_mcs ||
>  instr->op == 
> nir_texop_samples_identical))
> *fmask_ptr = get_sampler_desc(ctx, instr->texture, 
> AC_DESC_FMASK, instr, false, false);
>  }
> @@ -5150,7 +5159,7 @@ static void visit_tex(struct ac_nir_context *ctx, 
> nir_tex_instr *instr)
> 

[Mesa-dev] [PATCH] radv: implement multisample image copies

2018-03-08 Thread Dave Airlie
From: Dave Airlie 

It appears its quite legal to do image copies on multisample
images, however due to a bug in our txf handling and incomplete
tests we never actually noticed we didn't do it properly in radv.

This patch implements a compute shader to copy multiple samples
of an image to another image. It implements the nir txf_ms_mcs
opcode for getting the fmask value, and then uses that value
to either copy sample 0 to all samples, or iterate across
the valid samples copying them.

The shader is inspired by one RE'd from AMDVLK.

Fixes:
dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image*

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c |  54 ++---
 src/amd/vulkan/radv_meta_bufimage.c | 229 ++--
 src/amd/vulkan/radv_meta_copy.c |   2 +-
 src/amd/vulkan/radv_private.h   |   2 +-
 4 files changed, 258 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b850698608..071f54a5b61 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2277,6 +2277,7 @@ static LLVMValueRef build_tex_intrinsic(struct 
ac_nir_context *ctx,
switch (instr->op) {
case nir_texop_txf:
case nir_texop_txf_ms:
+   case nir_texop_txf_ms_mcs:
case nir_texop_samples_identical:
args->opcode = lod_is_zero ||
   instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
@@ -3417,6 +3418,24 @@ glsl_is_array_image(const struct glsl_type *type)
 }
 
 
+static LLVMValueRef get_fmask_desc_valid(struct ac_llvm_context *ctx,
+   LLVMValueRef fmask_desc_ptr)
+{
+   LLVMValueRef fmask_desc =
+   LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
+ctx->v8i32, "");
+
+   LLVMValueRef fmask_word1 =
+   LLVMBuildExtractElement(ctx->builder, fmask_desc,
+   ctx->i32_1, "");
+
+   LLVMValueRef word1_is_nonzero =
+   LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ fmask_word1, ctx->i32_0, "");
+
+   return word1_is_nonzero;
+}
+
 /* Adjust the sample index according to FMASK.
  *
  * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
@@ -3475,17 +3494,7 @@ static LLVMValueRef 
adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
 * resource descriptor is 0 (invalid),
 */
-   LLVMValueRef fmask_desc =
-   LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
-ctx->v8i32, "");
-
-   LLVMValueRef fmask_word1 =
-   LLVMBuildExtractElement(ctx->builder, fmask_desc,
-   ctx->i32_1, "");
-
-   LLVMValueRef word1_is_nonzero =
-   LLVMBuildICmp(ctx->builder, LLVMIntNE,
- fmask_word1, ctx->i32_0, "");
+   LLVMValueRef word1_is_nonzero = get_fmask_desc_valid(ctx, 
fmask_desc_ptr);
 
/* Replace the MSAA sample index. */
sample_index =
@@ -3518,7 +3527,7 @@ static LLVMValueRef get_image_coords(struct 
ac_nir_context *ctx,
bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
count = image_type_to_components_count(dim, is_array);
 
-   if (is_ms) {
+   if (is_ms && instr->intrinsic == nir_intrinsic_image_load) {
LLVMValueRef fmask_load_address[3];
int chan;
 
@@ -4899,7 +4908,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx,
if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, 
*samp_ptr);
}
-   if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
+   if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || 
instr->op == nir_texop_txf_ms_mcs ||
 instr->op == 
nir_texop_samples_identical))
*fmask_ptr = get_sampler_desc(ctx, instr->texture, 
AC_DESC_FMASK, instr, false, false);
 }
@@ -5150,7 +5159,7 @@ static void visit_tex(struct ac_nir_context *ctx, 
nir_tex_instr *instr)
/* Pack LOD */
if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) 
&& !lod_is_zero)) {
address[count++] = lod;
-   } else if (instr->op == nir_texop_txf_ms && sample_index) {
+   } else if ((instr->op == nir_texop_txf_ms || instr->op == 
nir_texop_txf) && sample_index) {
address[count++] = sample_index;
} else if(instr->op == nir_texop_txs) {
count = 0;
@@ -5165,7 +5174,8 @@ static void visit_tex(struct ac_nir_context *ctx, 
nir_tex_instr *instr)
 address[chan], ctx->ac.i32, 
"");