Re: [Mesa-dev] [PATCH] radv: implement multisample image copies
Hi Dave, I'd appreciate if you could split all the ac stuff into a separate patch. The big issue I see with the patch is that we don't support multisample image stores yet. In particular, you have to copy over the fmask value (and find a way to skip over the fmask lookup in the tex instruction) or clear the fmask just for the dst rect. - Bas On Fri, Mar 9, 2018 at 7:30 AM, Dave Airliewrote: > From: Dave Airlie > > It appears its quite legal to do image copies on multisample > images, however due to a bug in our txf handling and incomplete > tests we never actually noticed we didn't do it properly in radv. > > This patch implements a compute shader to copy multiple samples > of an image to another image. It implements the nir txf_ms_mcs > opcode for getting the fmask value, and then uses that value > to either copy sample 0 to all samples, or iterate across > the valid samples copying them. > > The shader is inspired by one RE'd from AMDVLK. > > Fixes: > dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image* > > Signed-off-by: Dave Airlie > --- > src/amd/common/ac_nir_to_llvm.c | 54 ++--- > src/amd/vulkan/radv_meta_bufimage.c | 229 > ++-- > src/amd/vulkan/radv_meta_copy.c | 2 +- > src/amd/vulkan/radv_private.h | 2 +- > 4 files changed, 258 insertions(+), 29 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 9b850698608..071f54a5b61 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -2277,6 +2277,7 @@ static LLVMValueRef build_tex_intrinsic(struct > ac_nir_context *ctx, > switch (instr->op) { > case nir_texop_txf: > case nir_texop_txf_ms: > + case nir_texop_txf_ms_mcs: > case nir_texop_samples_identical: > args->opcode = lod_is_zero || >instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? > @@ -3417,6 +3418,24 @@ glsl_is_array_image(const struct glsl_type *type) > } > > > +static LLVMValueRef get_fmask_desc_valid(struct ac_llvm_context *ctx, > + LLVMValueRef fmask_desc_ptr) > +{ > + LLVMValueRef fmask_desc = > + LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, > +ctx->v8i32, ""); > + > + LLVMValueRef fmask_word1 = > + LLVMBuildExtractElement(ctx->builder, fmask_desc, > + ctx->i32_1, ""); > + > + LLVMValueRef word1_is_nonzero = > + LLVMBuildICmp(ctx->builder, LLVMIntNE, > + fmask_word1, ctx->i32_0, ""); > + > + return word1_is_nonzero; > +} > + > /* Adjust the sample index according to FMASK. > * > * For uncompressed MSAA surfaces, FMASK should return 0x76543210, > @@ -3475,17 +3494,7 @@ static LLVMValueRef > adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, > /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK > * resource descriptor is 0 (invalid), > */ > - LLVMValueRef fmask_desc = > - LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, > -ctx->v8i32, ""); > - > - LLVMValueRef fmask_word1 = > - LLVMBuildExtractElement(ctx->builder, fmask_desc, > - ctx->i32_1, ""); > - > - LLVMValueRef word1_is_nonzero = > - LLVMBuildICmp(ctx->builder, LLVMIntNE, > - fmask_word1, ctx->i32_0, ""); > + LLVMValueRef word1_is_nonzero = get_fmask_desc_valid(ctx, > fmask_desc_ptr); > > /* Replace the MSAA sample index. */ > sample_index = > @@ -3518,7 +3527,7 @@ static LLVMValueRef get_image_coords(struct > ac_nir_context *ctx, > bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == > GLSL_SAMPLER_DIM_1D; > count = image_type_to_components_count(dim, is_array); > > - if (is_ms) { > + if (is_ms && instr->intrinsic == nir_intrinsic_image_load) { > LLVMValueRef fmask_load_address[3]; > int chan; > > @@ -4899,7 +4908,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, > if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) > *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, > *samp_ptr); > } > - if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || > + if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || > instr->op == nir_texop_txf_ms_mcs || > instr->op == > nir_texop_samples_identical)) > *fmask_ptr = get_sampler_desc(ctx, instr->texture, > AC_DESC_FMASK, instr, false, false); > } > @@ -5150,7 +5159,7 @@ static void visit_tex(struct ac_nir_context *ctx, > nir_tex_instr *instr) >
[Mesa-dev] [PATCH] radv: implement multisample image copies
From: Dave AirlieIt appears its quite legal to do image copies on multisample images, however due to a bug in our txf handling and incomplete tests we never actually noticed we didn't do it properly in radv. This patch implements a compute shader to copy multiple samples of an image to another image. It implements the nir txf_ms_mcs opcode for getting the fmask value, and then uses that value to either copy sample 0 to all samples, or iterate across the valid samples copying them. The shader is inspired by one RE'd from AMDVLK. Fixes: dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image* Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 54 ++--- src/amd/vulkan/radv_meta_bufimage.c | 229 ++-- src/amd/vulkan/radv_meta_copy.c | 2 +- src/amd/vulkan/radv_private.h | 2 +- 4 files changed, 258 insertions(+), 29 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9b850698608..071f54a5b61 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2277,6 +2277,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: + case nir_texop_txf_ms_mcs: case nir_texop_samples_identical: args->opcode = lod_is_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? @@ -3417,6 +3418,24 @@ glsl_is_array_image(const struct glsl_type *type) } +static LLVMValueRef get_fmask_desc_valid(struct ac_llvm_context *ctx, + LLVMValueRef fmask_desc_ptr) +{ + LLVMValueRef fmask_desc = + LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, +ctx->v8i32, ""); + + LLVMValueRef fmask_word1 = + LLVMBuildExtractElement(ctx->builder, fmask_desc, + ctx->i32_1, ""); + + LLVMValueRef word1_is_nonzero = + LLVMBuildICmp(ctx->builder, LLVMIntNE, + fmask_word1, ctx->i32_0, ""); + + return word1_is_nonzero; +} + /* Adjust the sample index according to FMASK. * * For uncompressed MSAA surfaces, FMASK should return 0x76543210, @@ -3475,17 +3494,7 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK * resource descriptor is 0 (invalid), */ - LLVMValueRef fmask_desc = - LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, -ctx->v8i32, ""); - - LLVMValueRef fmask_word1 = - LLVMBuildExtractElement(ctx->builder, fmask_desc, - ctx->i32_1, ""); - - LLVMValueRef word1_is_nonzero = - LLVMBuildICmp(ctx->builder, LLVMIntNE, - fmask_word1, ctx->i32_0, ""); + LLVMValueRef word1_is_nonzero = get_fmask_desc_valid(ctx, fmask_desc_ptr); /* Replace the MSAA sample index. */ sample_index = @@ -3518,7 +3527,7 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D; count = image_type_to_components_count(dim, is_array); - if (is_ms) { + if (is_ms && instr->intrinsic == nir_intrinsic_image_load) { LLVMValueRef fmask_load_address[3]; int chan; @@ -4899,7 +4908,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); } - if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || + if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_txf_ms_mcs || instr->op == nir_texop_samples_identical)) *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false); } @@ -5150,7 +5159,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) /* Pack LOD */ if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) { address[count++] = lod; - } else if (instr->op == nir_texop_txf_ms && sample_index) { + } else if ((instr->op == nir_texop_txf_ms || instr->op == nir_texop_txf) && sample_index) { address[count++] = sample_index; } else if(instr->op == nir_texop_txs) { count = 0; @@ -5165,7 +5174,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) address[chan], ctx->ac.i32, "");