Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek
On Thu, May 2, 2019 at 10:12 AM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > This changes requires LLVM r356755. > > 32706 shaders in 16744 tests > Totals: > SGPRS: 1448848 -> 1455984 (0.49 %) > VGPRS: 1016684 -> 1016220 (-0.05 %) > Spilled SGPRs: 25871 -> 25815 (-0.22 %) > Spilled VGPRs: 122 -> 122 (0.00 %) > Scratch size: 11964 -> 11956 (-0.07 %) dwords per thread > Code Size: 55324500 -> 55301152 (-0.04 %) bytes > Max Waves: 235660 -> 235586 (-0.03 %) > > Totals from affected shaders: > SGPRS: 293704 -> 300840 (2.43 %) > VGPRS: 246716 -> 246252 (-0.19 %) > Spilled SGPRs: 159 -> 103 (-35.22 %) > Scratch size: 188 -> 180 (-4.26 %) dwords per thread > Code Size: 8653664 -> 8630316 (-0.27 %) bytes > Max Waves: 60811 -> 60737 (-0.12 %) > > v3: - rebase on top of master > - remove the restriction for SSBO stores as well > v2: - fix llvm 8 > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > > I plan to run benchmarks with that change. > > src/amd/common/ac_llvm_build.c | 15 ++++++++------- > src/amd/common/ac_llvm_build.h | 1 + > src/amd/common/ac_nir_to_llvm.c | 9 ++++++--- > src/amd/vulkan/radv_nir_to_llvm.c | 4 +++- > 4 files changed, 18 insertions(+), 11 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c > b/src/amd/common/ac_llvm_build.c > index 22b771db774..e191a64310f 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -84,6 +84,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, > ctx->v3i32 = LLVMVectorType(ctx->i32, 3); > ctx->v4i32 = LLVMVectorType(ctx->i32, 4); > ctx->v2f32 = LLVMVectorType(ctx->f32, 2); > + ctx->v3f32 = LLVMVectorType(ctx->f32, 3); > ctx->v4f32 = LLVMVectorType(ctx->f32, 4); > ctx->v8i32 = LLVMVectorType(ctx->i32, 8); > > @@ -1167,7 +1168,7 @@ ac_build_llvm8_buffer_store_common(struct > ac_llvm_context *ctx, > args[idx++] = voffset ? voffset : ctx->i32_0; > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = num_channels == 3 ? 4 : num_channels; > + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > @@ -1225,9 +1226,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context > *ctx, > bool writeonly_memory, > bool swizzle_enable_hint) > { > - /* Split 3 channel stores, becase LLVM doesn't support 3-channel > + /* Split 3 channel stores, because only LLVM 9+ support 3-channel > * intrinsics. */ > - if (num_channels == 3) { > + if (num_channels == 3 && HAVE_LLVM < 0x900) { > LLVMValueRef v[3], v01; > > for (int i = 0; i < 3; i++) { > @@ -1354,7 +1355,7 @@ ac_build_llvm8_buffer_load_common(struct > ac_llvm_context *ctx, > args[idx++] = voffset ? voffset : ctx->i32_0; > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = num_channels == 3 ? 4 : num_channels; > + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > @@ -1420,7 +1421,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, > if (num_channels == 1) > return result[0]; > > - if (num_channels == 3) > + if (num_channels == 3 && HAVE_LLVM < 0x900) > result[num_channels++] = LLVMGetUndef(ctx->f32); > return ac_build_gather_values(ctx, result, num_channels); > } > @@ -1512,7 +1513,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context > *ctx, > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = num_channels == 3 ? 4 : num_channels; > + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > @@ -1698,7 +1699,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context > *ctx, > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = num_channels == 3 ? 4 : num_channels; > + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > diff --git a/src/amd/common/ac_llvm_build.h > b/src/amd/common/ac_llvm_build.h > index 98f856106d6..19db808a9a8 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -71,6 +71,7 @@ struct ac_llvm_context { > LLVMTypeRef v3i32; > LLVMTypeRef v4i32; > LLVMTypeRef v2f32; > + LLVMTypeRef v3f32; > LLVMTypeRef v4f32; > LLVMTypeRef v8i32; > > diff --git a/src/amd/common/ac_nir_to_llvm.c > b/src/amd/common/ac_nir_to_llvm.c > index c92eaaca31d..d0bfeb3efa9 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -1575,9 +1575,9 @@ static void visit_store_ssbo(struct ac_nir_context > *ctx, > > u_bit_scan_consecutive_range(&writemask, &start, &count); > > - /* Due to an LLVM limitation, split 3-element writes > - * into a 2-element and a 1-element write. */ > - if (count == 3) { > + /* Due to an LLVM limitation with LLVM < 9, split 3-element > + * writes into a 2-element and a 1-element write. */ > + if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM < > 0x900)) { > writemask |= 1 << (start + 2); > count = 2; > } > @@ -1619,6 +1619,9 @@ static void visit_store_ssbo(struct ac_nir_context > *ctx, > case 16: /* v4f32 */ > data_type = ctx->ac.v4f32; > break; > + case 12: /* v3f32 */ > + data_type = ctx->ac.v3f32; > + break; > case 8: /* v2f32 */ > data_type = ctx->ac.v2f32; > break; > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index b4a19aa2e5d..c40ea004831 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -2748,7 +2748,9 @@ radv_emit_stream_output(struct radv_shader_context > *ctx, > /* fall through */ > case 4: /* as v4i32 */ > vdata = ac_build_gather_values(&ctx->ac, out, > - > util_next_power_of_two(num_comps)); > + HAVE_LLVM < 0x900 ? > + > util_next_power_of_two(num_comps) : > + num_comps); > break; > } > > -- > 2.21.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev