Re: [Mesa-dev] nir_opt_copy_prop_vars doing the wrong thing
https://gitlab.freedesktop.org/airlied/mesa/tree/nir-copy-props-cast-test Contains a unit test that triggers it for me. Dave. On Sat, 11 May 2019 at 09:54, Jason Ekstrand wrote: > > We have unit tests for that pass. Maybe you could write one which exercises > the issue? It'd help in debugging. > > On Thu, May 9, 2019 at 8:12 PM Dave Airlie wrote: >> >> I've got a bunch of cases where copy prop vars is getting things wrong >> around casts, it finds a store to an vec2 but ends up with the >> writemask staying at 0x3 but the item being store being a single >> 64-bit. >> >> Debug is attached below. >> >> Dave. >> >> nir_lower_memcpy_deref >> shader: MESA_SHADER_KERNEL >> local-size: 0, 0, 0 (variable) >> shared-size: 1 >> inputs: 16 >> outputs: 0 >> uniforms: 0 >> shared: 0 >> decl_var shader_in INTERP_MODE_NONE uint64_t @0 (0.x, 0, 0) >> decl_var shader_in INTERP_MODE_NONE uint64_t @1 (1.x, 8, 0) >> decl_function __wrapped_vload2_private (0 params) >> >> impl __wrapped_vload2_private { >> decl_var INTERP_MODE_NONE uint[3] @2 >> block block_0: >> /* preds: */ >> vec1 64 ssa_0 = deref_var &@0 (shader_in uint64_t) >> vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */ >> vec1 64 ssa_2 = deref_var &@1 (shader_in uint64_t) >> vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */ >> vec1 64 ssa_4 = load_const (0x 0 /* 0.00 */) >> vec1 64 ssa_5 = load_const (0x 0 /* 0.00 */) >> vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint) >> vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2) >> vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */ >> vec1 64 ssa_9 = deref_var &@2 (function_temp uint[3]) >> vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3]) >> vec1 64 ssa_11 = load_const (0x 0 /* 0.00 */) >> vec1 64 ssa_13 = load_const (0x 0 /* 0.00 */) >> vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /* &@2[0] >> */ >> intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /* access=0 >> */ >> vec1 64 ssa_15 = deref_cast (uint *)ssa_1 (global uint) >> vec1 64 ssa_16 = load_const (0x 1 /* 0.00 */) >> vec1 64 ssa_17 = deref_ptr_as_array &(*ssa_15)[1] (global uint) /* >> &(*(uint *)ssa_1)[1] */ >> vec1 32 ssa_18 = intrinsic load_deref (ssa_17) (0) /* access=0 */ >> vec1 64 ssa_19 = deref_var &@2 (function_temp uint[3]) >> vec1 64 ssa_20 = deref_cast (uint[3] *)ssa_19 (function_temp uint[3]) >> vec1 64 ssa_21 = load_const (0x 0 /* 0.00 */) >> vec1 64 ssa_23 = load_const (0x 1 /* 0.00 */) >> vec1 64 ssa_24 = deref_array &(*ssa_19)[1] (function_temp uint) /* >> &@2[1] */ >> intrinsic store_deref (ssa_24, ssa_18) (1, 0) /* wrmask=x */ /* access=0 >> */ >> vec1 64 ssa_25 = deref_cast (uint *)ssa_1 (global uint) >> vec1 64 ssa_26 = load_const (0x 2 /* 0.00 */) >> vec1 64 ssa_27 = deref_ptr_as_array &(*ssa_25)[2] (global uint) /* >> &(*(uint *)ssa_1)[2] */ >> vec1 32 ssa_28 = intrinsic load_deref (ssa_27) (0) /* access=0 */ >> vec1 64 ssa_29 = deref_var &@2 (function_temp uint[3]) >> vec1 64 ssa_30 = deref_cast (uint[3] *)ssa_29 (function_temp uint[3]) >> vec1 64 ssa_31 = load_const (0x 0 /* 0.00 */) >> vec1 64 ssa_33 = load_const (0x 2 /* 0.00 */) >> vec1 64 ssa_34 = deref_array &(*ssa_29)[2] (function_temp uint) /* >> &@2[2] */ >> intrinsic store_deref (ssa_34, ssa_28) (1, 0) /* wrmask=x */ /* access=0 >> */ >> vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_14 (function_temp uvec2) >> vec2 32 ssa_37 = intrinsic load_deref (ssa_35) (0) /* access=0 */ >> intrinsic store_deref (ssa_7, ssa_37) (3, 0) /* wrmask=xy */ /* access=0 >> */ >> vec1 64 ssa_38 = deref_cast (uvec2 *)ssa_24 (function_temp uvec2) >> vec2 32 ssa_40 = intrinsic load_deref (ssa_38) (0) /* access=0 */ >> vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_3 (global uvec2) >> vec1 64 ssa_42 = load_const (0x 1 /* 0.00 */) >> vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[1] (global uvec2) >> /* &(*(uvec2 *)ssa_3)[1] */ >> intrinsic store_deref (ssa_43, ssa_40) (3, 0) /* wrmask=xy */ /* >> access=0 */ >> /* succs: block_1 */ >> block block_1: >> } >> >> nir_opt_copy_prop_vars >> ## nir_copy_prop_vars_impl for __wrapped_vload2_private >> # block0 >> >> vec1 64 ssa_0 = deref_var &unnamed (shader_in uint64_t) >> vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */ >> uint64_t unnamed: ssa_1 >> >> vec1 64 ssa_2 = deref_var &unnamed (shader_in uint64_t) >> vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */ >> uint64_t unnamed: ssa_1 >> uint64_t unnamed: ssa_3 >> >> vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint) >> vec1 64 ssa_7 = deref_cast (uve
Re: [Mesa-dev] nir_opt_copy_prop_vars doing the wrong thing
We have unit tests for that pass. Maybe you could write one which exercises the issue? It'd help in debugging. On Thu, May 9, 2019 at 8:12 PM Dave Airlie wrote: > I've got a bunch of cases where copy prop vars is getting things wrong > around casts, it finds a store to an vec2 but ends up with the > writemask staying at 0x3 but the item being store being a single > 64-bit. > > Debug is attached below. > > Dave. > > nir_lower_memcpy_deref > shader: MESA_SHADER_KERNEL > local-size: 0, 0, 0 (variable) > shared-size: 1 > inputs: 16 > outputs: 0 > uniforms: 0 > shared: 0 > decl_var shader_in INTERP_MODE_NONE uint64_t @0 (0.x, 0, 0) > decl_var shader_in INTERP_MODE_NONE uint64_t @1 (1.x, 8, 0) > decl_function __wrapped_vload2_private (0 params) > > impl __wrapped_vload2_private { > decl_var INTERP_MODE_NONE uint[3] @2 > block block_0: > /* preds: */ > vec1 64 ssa_0 = deref_var &@0 (shader_in uint64_t) > vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */ > vec1 64 ssa_2 = deref_var &@1 (shader_in uint64_t) > vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */ > vec1 64 ssa_4 = load_const (0x 0 /* 0.00 */) > vec1 64 ssa_5 = load_const (0x 0 /* 0.00 */) > vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint) > vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2) > vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */ > vec1 64 ssa_9 = deref_var &@2 (function_temp uint[3]) > vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3]) > vec1 64 ssa_11 = load_const (0x 0 /* 0.00 */) > vec1 64 ssa_13 = load_const (0x 0 /* 0.00 */) > vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /* > &@2[0] */ > intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /* > access=0 */ > vec1 64 ssa_15 = deref_cast (uint *)ssa_1 (global uint) > vec1 64 ssa_16 = load_const (0x 1 /* 0.00 */) > vec1 64 ssa_17 = deref_ptr_as_array &(*ssa_15)[1] (global uint) /* > &(*(uint *)ssa_1)[1] */ > vec1 32 ssa_18 = intrinsic load_deref (ssa_17) (0) /* access=0 */ > vec1 64 ssa_19 = deref_var &@2 (function_temp uint[3]) > vec1 64 ssa_20 = deref_cast (uint[3] *)ssa_19 (function_temp uint[3]) > vec1 64 ssa_21 = load_const (0x 0 /* 0.00 */) > vec1 64 ssa_23 = load_const (0x 1 /* 0.00 */) > vec1 64 ssa_24 = deref_array &(*ssa_19)[1] (function_temp uint) /* > &@2[1] */ > intrinsic store_deref (ssa_24, ssa_18) (1, 0) /* wrmask=x */ /* > access=0 */ > vec1 64 ssa_25 = deref_cast (uint *)ssa_1 (global uint) > vec1 64 ssa_26 = load_const (0x 2 /* 0.00 */) > vec1 64 ssa_27 = deref_ptr_as_array &(*ssa_25)[2] (global uint) /* > &(*(uint *)ssa_1)[2] */ > vec1 32 ssa_28 = intrinsic load_deref (ssa_27) (0) /* access=0 */ > vec1 64 ssa_29 = deref_var &@2 (function_temp uint[3]) > vec1 64 ssa_30 = deref_cast (uint[3] *)ssa_29 (function_temp uint[3]) > vec1 64 ssa_31 = load_const (0x 0 /* 0.00 */) > vec1 64 ssa_33 = load_const (0x 2 /* 0.00 */) > vec1 64 ssa_34 = deref_array &(*ssa_29)[2] (function_temp uint) /* > &@2[2] */ > intrinsic store_deref (ssa_34, ssa_28) (1, 0) /* wrmask=x */ /* > access=0 */ > vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_14 (function_temp uvec2) > vec2 32 ssa_37 = intrinsic load_deref (ssa_35) (0) /* access=0 */ > intrinsic store_deref (ssa_7, ssa_37) (3, 0) /* wrmask=xy */ /* > access=0 */ > vec1 64 ssa_38 = deref_cast (uvec2 *)ssa_24 (function_temp uvec2) > vec2 32 ssa_40 = intrinsic load_deref (ssa_38) (0) /* access=0 */ > vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_3 (global uvec2) > vec1 64 ssa_42 = load_const (0x 1 /* 0.00 */) > vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[1] (global uvec2) > /* &(*(uvec2 *)ssa_3)[1] */ > intrinsic store_deref (ssa_43, ssa_40) (3, 0) /* wrmask=xy */ /* > access=0 */ > /* succs: block_1 */ > block block_1: > } > > nir_opt_copy_prop_vars > ## nir_copy_prop_vars_impl for __wrapped_vload2_private > # block0 > > vec1 64 ssa_0 = deref_var &unnamed (shader_in uint64_t) > vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */ > uint64_t unnamed: ssa_1 > > vec1 64 ssa_2 = deref_var &unnamed (shader_in uint64_t) > vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */ > uint64_t unnamed: ssa_1 > uint64_t unnamed: ssa_3 > > vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint) > vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2) > vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */ > uint64_t unnamed: ssa_1 > uint64_t unnamed: ssa_3 > uint (uint *)ssa_1: ssa_8 > > vec1 64 ssa_9 = deref_var &unnamed (function_temp uint[3]) > vec1 64 ssa_10 = deref_cast (uint[
[Mesa-dev] nir_opt_copy_prop_vars doing the wrong thing
I've got a bunch of cases where copy prop vars is getting things wrong around casts, it finds a store to an vec2 but ends up with the writemask staying at 0x3 but the item being store being a single 64-bit. Debug is attached below. Dave. nir_lower_memcpy_deref shader: MESA_SHADER_KERNEL local-size: 0, 0, 0 (variable) shared-size: 1 inputs: 16 outputs: 0 uniforms: 0 shared: 0 decl_var shader_in INTERP_MODE_NONE uint64_t @0 (0.x, 0, 0) decl_var shader_in INTERP_MODE_NONE uint64_t @1 (1.x, 8, 0) decl_function __wrapped_vload2_private (0 params) impl __wrapped_vload2_private { decl_var INTERP_MODE_NONE uint[3] @2 block block_0: /* preds: */ vec1 64 ssa_0 = deref_var &@0 (shader_in uint64_t) vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */ vec1 64 ssa_2 = deref_var &@1 (shader_in uint64_t) vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */ vec1 64 ssa_4 = load_const (0x 0 /* 0.00 */) vec1 64 ssa_5 = load_const (0x 0 /* 0.00 */) vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint) vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2) vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */ vec1 64 ssa_9 = deref_var &@2 (function_temp uint[3]) vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3]) vec1 64 ssa_11 = load_const (0x 0 /* 0.00 */) vec1 64 ssa_13 = load_const (0x 0 /* 0.00 */) vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /* &@2[0] */ intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /* access=0 */ vec1 64 ssa_15 = deref_cast (uint *)ssa_1 (global uint) vec1 64 ssa_16 = load_const (0x 1 /* 0.00 */) vec1 64 ssa_17 = deref_ptr_as_array &(*ssa_15)[1] (global uint) /* &(*(uint *)ssa_1)[1] */ vec1 32 ssa_18 = intrinsic load_deref (ssa_17) (0) /* access=0 */ vec1 64 ssa_19 = deref_var &@2 (function_temp uint[3]) vec1 64 ssa_20 = deref_cast (uint[3] *)ssa_19 (function_temp uint[3]) vec1 64 ssa_21 = load_const (0x 0 /* 0.00 */) vec1 64 ssa_23 = load_const (0x 1 /* 0.00 */) vec1 64 ssa_24 = deref_array &(*ssa_19)[1] (function_temp uint) /* &@2[1] */ intrinsic store_deref (ssa_24, ssa_18) (1, 0) /* wrmask=x */ /* access=0 */ vec1 64 ssa_25 = deref_cast (uint *)ssa_1 (global uint) vec1 64 ssa_26 = load_const (0x 2 /* 0.00 */) vec1 64 ssa_27 = deref_ptr_as_array &(*ssa_25)[2] (global uint) /* &(*(uint *)ssa_1)[2] */ vec1 32 ssa_28 = intrinsic load_deref (ssa_27) (0) /* access=0 */ vec1 64 ssa_29 = deref_var &@2 (function_temp uint[3]) vec1 64 ssa_30 = deref_cast (uint[3] *)ssa_29 (function_temp uint[3]) vec1 64 ssa_31 = load_const (0x 0 /* 0.00 */) vec1 64 ssa_33 = load_const (0x 2 /* 0.00 */) vec1 64 ssa_34 = deref_array &(*ssa_29)[2] (function_temp uint) /* &@2[2] */ intrinsic store_deref (ssa_34, ssa_28) (1, 0) /* wrmask=x */ /* access=0 */ vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_14 (function_temp uvec2) vec2 32 ssa_37 = intrinsic load_deref (ssa_35) (0) /* access=0 */ intrinsic store_deref (ssa_7, ssa_37) (3, 0) /* wrmask=xy */ /* access=0 */ vec1 64 ssa_38 = deref_cast (uvec2 *)ssa_24 (function_temp uvec2) vec2 32 ssa_40 = intrinsic load_deref (ssa_38) (0) /* access=0 */ vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_3 (global uvec2) vec1 64 ssa_42 = load_const (0x 1 /* 0.00 */) vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[1] (global uvec2) /* &(*(uvec2 *)ssa_3)[1] */ intrinsic store_deref (ssa_43, ssa_40) (3, 0) /* wrmask=xy */ /* access=0 */ /* succs: block_1 */ block block_1: } nir_opt_copy_prop_vars ## nir_copy_prop_vars_impl for __wrapped_vload2_private # block0 vec1 64 ssa_0 = deref_var &unnamed (shader_in uint64_t) vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */ uint64_t unnamed: ssa_1 vec1 64 ssa_2 = deref_var &unnamed (shader_in uint64_t) vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */ uint64_t unnamed: ssa_1 uint64_t unnamed: ssa_3 vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint) vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2) vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */ uint64_t unnamed: ssa_1 uint64_t unnamed: ssa_3 uint (uint *)ssa_1: ssa_8 vec1 64 ssa_9 = deref_var &unnamed (function_temp uint[3]) vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3]) vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /* &unnamed[0] */ intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /* access=0 */ uint64_t unnamed: ssa_1 uint64_t unnamed: ssa_3 uint (uint *)ssa_1: ssa_8 uint unnamed[0]: ssa_8 vec1 64 ssa_15 = deref_cast (uint *)ssa_1