For full effect, you want to also enable shrink_vec_var_arrays and split_array_vars
On Wed, Oct 17, 2018 at 6:00 PM Timothy Arceri <tarc...@itsqueeze.com> wrote: > Totals from affected shaders: > SGPRS: 1112 -> 1112 (0.00 %) > VGPRS: 1492 -> 1196 (-19.84 %) > Spilled SGPRs: 0 -> 0 (0.00 %) > Spilled VGPRs: 0 -> 0 (0.00 %) > Private memory VGPRs: 0 -> 0 (0.00 %) > Scratch size: 0 -> 0 (0.00 %) dwords per thread > Code Size: 112172 -> 101316 (-9.68 %) bytes > LDS: 0 -> 0 (0.00 %) blocks > Max Waves: 93 -> 98 (5.38 %) > Wait states: 0 -> 0 (0.00 %) > > All affected shaders are from "Batman: Arkham City" over DXVK. > > The pass detects that the temporary array created by DXVK for > storing TCS inputs is a copy of the input arrays and allows > us to avoid copying all of the input data and then indirecting > on it with if-ladders, instead we just do indirect indexing. > --- > src/amd/vulkan/radv_pipeline.c | 6 +++--- > src/amd/vulkan/radv_shader.c | 22 ++++++++++++++++++---- > src/amd/vulkan/radv_shader.h | 3 ++- > 3 files changed, 23 insertions(+), 8 deletions(-) > > diff --git a/src/amd/vulkan/radv_pipeline.c > b/src/amd/vulkan/radv_pipeline.c > index e1d665d0ac7..8d15a048bbf 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -1808,13 +1808,13 @@ radv_link_shaders(struct radv_pipeline *pipeline, > nir_shader **shaders) > > ac_lower_indirect_derefs(ordered_shaders[i], > > pipeline->device->physical_device->rad_info.chip_class); > } > - radv_optimize_nir(ordered_shaders[i], false); > + radv_optimize_nir(ordered_shaders[i], false, > false); > > if > (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) { > ac_lower_indirect_derefs(ordered_shaders[i > - 1], > > pipeline->device->physical_device->rad_info.chip_class); > } > - radv_optimize_nir(ordered_shaders[i - 1], false); > + radv_optimize_nir(ordered_shaders[i - 1], false, > false); > } > } > } > @@ -2073,7 +2073,7 @@ void radv_create_shaders(struct radv_pipeline > *pipeline, > > if (!(flags & > VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) { > nir_lower_io_to_scalar_early(nir[i], mask); > - radv_optimize_nir(nir[i], false); > + radv_optimize_nir(nir[i], false, false); > } > } > } > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c > index 3b3422c8da6..52aa83d4a5a 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -118,7 +118,8 @@ void radv_DestroyShaderModule( > } > > void > -radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) > +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, > + bool allow_copies) > { > bool progress; > > @@ -128,6 +129,15 @@ radv_optimize_nir(struct nir_shader *shader, bool > optimize_conservatively) > NIR_PASS_V(shader, nir_lower_vars_to_ssa); > NIR_PASS_V(shader, nir_lower_pack); > > + if (allow_copies) { > + /* Only run this pass in the first call to > + * radv_optimize_nir. Later calls assume that > we've > + * lowered away any copy_deref instructions and we > + * don't want to introduce any more. > + */ > + NIR_PASS(progress, shader, > nir_opt_find_array_copies); > + } > + > NIR_PASS(progress, shader, nir_opt_copy_prop_vars); > NIR_PASS(progress, shader, nir_opt_dead_write_vars); > > @@ -306,7 +316,6 @@ radv_shader_compile_to_nir(struct radv_device *device, > } > > nir_split_var_copies(nir); > - nir_lower_var_copies(nir); > > nir_lower_global_vars_to_local(nir); > nir_remove_dead_variables(nir, nir_var_local); > @@ -323,7 +332,12 @@ radv_shader_compile_to_nir(struct radv_device *device, > nir_lower_load_const_to_scalar(nir); > > if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) > - radv_optimize_nir(nir, false); > + radv_optimize_nir(nir, false, true); > + > + /* We call nir_lower_var_copies() after the first > radv_optimize_nir() > + * to remove any copies introduced by nir_opt_find_array_copies(). > + */ > + nir_lower_var_copies(nir); > > /* Indirect lowering must be called after the radv_optimize_nir() > loop > * has been called at least once. Otherwise indirect lowering can > @@ -331,7 +345,7 @@ radv_shader_compile_to_nir(struct radv_device *device, > * considered too large for unrolling. > */ > ac_lower_indirect_derefs(nir, > device->physical_device->rad_info.chip_class); > - radv_optimize_nir(nir, flags & > VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT); > + radv_optimize_nir(nir, flags & > VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false); > > return nir; > } > diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h > index c490b69f52b..22423e5f99a 100644 > --- a/src/amd/vulkan/radv_shader.h > +++ b/src/amd/vulkan/radv_shader.h > @@ -298,7 +298,8 @@ struct radv_shader_slab { > }; > > void > -radv_optimize_nir(struct nir_shader *shader, bool > optimize_conservatively); > +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, > + bool allow_copies); > > nir_shader * > radv_shader_compile_to_nir(struct radv_device *device, > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev