On Fri, May 3, 2019 at 11:42 AM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > Because the new raw/struct intrinsics are buggy with LLVM 8 > (they weren't marked as source of divergence), we fallback to the > old instrinsics for atomic buffer operations. This means we need > to apply the indexing workaround for GFX9.
Can you make it more clear that we only delayed atomics to LLVM 9 and not load/store. I was confused on why we needed another variable. Otherwise r-b > > The fact that we need another workaround is painful but we should > be able to clean up that a bit once LLVM 7 support will be dropped. > > This fixes a GPU hang with AC Odyssey and some rendering problems > with Nioh. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573 > Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image atomic > intrinsics with LLVM 9+") > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/common/ac_nir_to_llvm.c | 12 +++++++----- > src/amd/common/ac_shader_abi.h | 1 + > src/amd/vulkan/radv_nir_to_llvm.c | 6 ++++++ > 3 files changed, 14 insertions(+), 5 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index c92eaaca31d..151e0d0f961 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -2417,10 +2417,12 @@ static void get_image_coords(struct ac_nir_context > *ctx, > } > > static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx, > - const nir_intrinsic_instr > *instr, bool write) > + const nir_intrinsic_instr > *instr, > + bool write, bool atomic) > { > LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, > write); > - if (ctx->abi->gfx9_stride_size_workaround) { > + if (ctx->abi->gfx9_stride_size_workaround || > + (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) { > LLVMValueRef elem_count = > LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, > 0), ""); > LLVMValueRef stride = > LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, > 0), ""); > stride = LLVMBuildLShr(ctx->ac.builder, stride, > LLVMConstInt(ctx->ac.i32, 16, 0), ""); > @@ -2466,7 +2468,7 @@ static LLVMValueRef visit_image_load(struct > ac_nir_context *ctx, > unsigned num_channels = util_last_bit(mask); > LLVMValueRef rsrc, vindex; > > - rsrc = get_image_buffer_descriptor(ctx, instr, false); > + rsrc = get_image_buffer_descriptor(ctx, instr, false, false); > vindex = LLVMBuildExtractElement(ctx->ac.builder, > get_src(ctx, instr->src[1]), > ctx->ac.i32_0, ""); > > @@ -2520,7 +2522,7 @@ static void visit_image_store(struct ac_nir_context > *ctx, > args.cache_policy = get_cache_policy(ctx, access, true, > writeonly_memory); > > if (dim == GLSL_SAMPLER_DIM_BUF) { > - LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, > true); > + LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, > true, false); > LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, > instr->src[3])); > unsigned src_channels = ac_get_llvm_num_components(src); > LLVMValueRef vindex; > @@ -2632,7 +2634,7 @@ static LLVMValueRef visit_image_atomic(struct > ac_nir_context *ctx, > params[param_count++] = get_src(ctx, instr->src[3]); > > if (dim == GLSL_SAMPLER_DIM_BUF) { > - params[param_count++] = get_image_buffer_descriptor(ctx, > instr, true); > + params[param_count++] = get_image_buffer_descriptor(ctx, > instr, true, true); > params[param_count++] = > LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), > > ctx->ac.i32_0, ""); /* vindex */ > params[param_count++] = ctx->ac.i32_0; /* voffset */ > diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h > index 108fe58ce57..8debb1ff986 100644 > --- a/src/amd/common/ac_shader_abi.h > +++ b/src/amd/common/ac_shader_abi.h > @@ -203,6 +203,7 @@ struct ac_shader_abi { > /* Whether to workaround GFX9 ignoring the stride for the buffer size > if IDXEN=0 > * and LLVM optimizes an indexed load with constant index to IDXEN=0. > */ > bool gfx9_stride_size_workaround; > + bool gfx9_stride_size_workaround_for_atomic; > }; > > #endif /* AC_SHADER_ABI_H */ > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index 796d78e34f4..d83f0bd547f 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -3687,6 +3687,12 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct > ac_llvm_compiler *ac_llvm, > ctx.abi.clamp_shadow_reference = false; > ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && > HAVE_LLVM < 0x800; > > + /* Because the new raw/struct atomic intrinsics are buggy with LLVM 8, > + * we fallback to the old intrinsics for atomic buffer image > operations > + * and thus we need to apply the indexing workaround... > + */ > + ctx.abi.gfx9_stride_size_workaround_for_atomic = ctx.ac.chip_class == > GFX9 && HAVE_LLVM < 0x900; > + > if (shader_count >= 2) > ac_init_exec_full_mask(&ctx.ac); > > -- > 2.21.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev