Re: [Mesa-dev] nir_opt_copy_prop_vars doing the wrong thing

2019-05-21 Thread Dave Airlie
https://gitlab.freedesktop.org/airlied/mesa/tree/nir-copy-props-cast-test

Contains a unit test that triggers it for me.

Dave.

On Sat, 11 May 2019 at 09:54, Jason Ekstrand  wrote:
>
> We have unit tests for that pass.  Maybe you could write one which exercises 
> the issue?  It'd help in debugging.
>
> On Thu, May 9, 2019 at 8:12 PM Dave Airlie  wrote:
>>
>> I've got a bunch of cases where copy prop vars is getting things wrong
>> around casts, it finds a store to an vec2 but ends up with the
>> writemask staying at 0x3 but the item being store being a single
>> 64-bit.
>>
>> Debug is attached below.
>>
>> Dave.
>>
>> nir_lower_memcpy_deref
>> shader: MESA_SHADER_KERNEL
>> local-size: 0, 0, 0 (variable)
>> shared-size: 1
>> inputs: 16
>> outputs: 0
>> uniforms: 0
>> shared: 0
>> decl_var shader_in INTERP_MODE_NONE uint64_t @0 (0.x, 0, 0)
>> decl_var shader_in INTERP_MODE_NONE uint64_t @1 (1.x, 8, 0)
>> decl_function __wrapped_vload2_private (0 params)
>>
>> impl __wrapped_vload2_private {
>> decl_var  INTERP_MODE_NONE uint[3] @2
>> block block_0:
>> /* preds: */
>> vec1 64 ssa_0 = deref_var &@0 (shader_in uint64_t)
>> vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */
>> vec1 64 ssa_2 = deref_var &@1 (shader_in uint64_t)
>> vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */
>> vec1 64 ssa_4 = load_const (0x   0 /* 0.00 */)
>> vec1 64 ssa_5 = load_const (0x   0 /* 0.00 */)
>> vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint)
>> vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2)
>> vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */
>> vec1 64 ssa_9 = deref_var &@2 (function_temp uint[3])
>> vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3])
>> vec1 64 ssa_11 = load_const (0x   0 /* 0.00 */)
>> vec1 64 ssa_13 = load_const (0x   0 /* 0.00 */)
>> vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /* &@2[0] 
>> */
>> intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /* access=0 
>> */
>> vec1 64 ssa_15 = deref_cast (uint *)ssa_1 (global uint)
>> vec1 64 ssa_16 = load_const (0x   1 /* 0.00 */)
>> vec1 64 ssa_17 = deref_ptr_as_array &(*ssa_15)[1] (global uint) /*
>> &(*(uint *)ssa_1)[1] */
>> vec1 32 ssa_18 = intrinsic load_deref (ssa_17) (0) /* access=0 */
>> vec1 64 ssa_19 = deref_var &@2 (function_temp uint[3])
>> vec1 64 ssa_20 = deref_cast (uint[3] *)ssa_19 (function_temp uint[3])
>> vec1 64 ssa_21 = load_const (0x   0 /* 0.00 */)
>> vec1 64 ssa_23 = load_const (0x   1 /* 0.00 */)
>> vec1 64 ssa_24 = deref_array &(*ssa_19)[1] (function_temp uint) /* 
>> &@2[1] */
>> intrinsic store_deref (ssa_24, ssa_18) (1, 0) /* wrmask=x */ /* access=0 
>> */
>> vec1 64 ssa_25 = deref_cast (uint *)ssa_1 (global uint)
>> vec1 64 ssa_26 = load_const (0x   2 /* 0.00 */)
>> vec1 64 ssa_27 = deref_ptr_as_array &(*ssa_25)[2] (global uint) /*
>> &(*(uint *)ssa_1)[2] */
>> vec1 32 ssa_28 = intrinsic load_deref (ssa_27) (0) /* access=0 */
>> vec1 64 ssa_29 = deref_var &@2 (function_temp uint[3])
>> vec1 64 ssa_30 = deref_cast (uint[3] *)ssa_29 (function_temp uint[3])
>> vec1 64 ssa_31 = load_const (0x   0 /* 0.00 */)
>> vec1 64 ssa_33 = load_const (0x   2 /* 0.00 */)
>> vec1 64 ssa_34 = deref_array &(*ssa_29)[2] (function_temp uint) /* 
>> &@2[2] */
>> intrinsic store_deref (ssa_34, ssa_28) (1, 0) /* wrmask=x */ /* access=0 
>> */
>> vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_14 (function_temp uvec2)
>> vec2 32 ssa_37 = intrinsic load_deref (ssa_35) (0) /* access=0 */
>> intrinsic store_deref (ssa_7, ssa_37) (3, 0) /* wrmask=xy */ /* access=0 
>> */
>> vec1 64 ssa_38 = deref_cast (uvec2 *)ssa_24 (function_temp uvec2)
>> vec2 32 ssa_40 = intrinsic load_deref (ssa_38) (0) /* access=0 */
>> vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_3 (global uvec2)
>> vec1 64 ssa_42 = load_const (0x   1 /* 0.00 */)
>> vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[1] (global uvec2)
>> /* &(*(uvec2 *)ssa_3)[1] */
>> intrinsic store_deref (ssa_43, ssa_40) (3, 0) /* wrmask=xy */ /* 
>> access=0 */
>> /* succs: block_1 */
>> block block_1:
>> }
>>
>> nir_opt_copy_prop_vars
>> ## nir_copy_prop_vars_impl for __wrapped_vload2_private
>> # block0
>>
>>   vec1 64 ssa_0 = deref_var &unnamed (shader_in uint64_t)
>>   vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */
>> uint64_t unnamed: ssa_1
>>
>>   vec1 64 ssa_2 = deref_var &unnamed (shader_in uint64_t)
>>   vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */
>> uint64_t unnamed: ssa_1
>> uint64_t unnamed: ssa_3
>>
>>   vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint)
>>   vec1 64 ssa_7 = deref_cast (uve

Re: [Mesa-dev] nir_opt_copy_prop_vars doing the wrong thing

2019-05-10 Thread Jason Ekstrand
We have unit tests for that pass.  Maybe you could write one which
exercises the issue?  It'd help in debugging.

On Thu, May 9, 2019 at 8:12 PM Dave Airlie  wrote:

> I've got a bunch of cases where copy prop vars is getting things wrong
> around casts, it finds a store to an vec2 but ends up with the
> writemask staying at 0x3 but the item being store being a single
> 64-bit.
>
> Debug is attached below.
>
> Dave.
>
> nir_lower_memcpy_deref
> shader: MESA_SHADER_KERNEL
> local-size: 0, 0, 0 (variable)
> shared-size: 1
> inputs: 16
> outputs: 0
> uniforms: 0
> shared: 0
> decl_var shader_in INTERP_MODE_NONE uint64_t @0 (0.x, 0, 0)
> decl_var shader_in INTERP_MODE_NONE uint64_t @1 (1.x, 8, 0)
> decl_function __wrapped_vload2_private (0 params)
>
> impl __wrapped_vload2_private {
> decl_var  INTERP_MODE_NONE uint[3] @2
> block block_0:
> /* preds: */
> vec1 64 ssa_0 = deref_var &@0 (shader_in uint64_t)
> vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */
> vec1 64 ssa_2 = deref_var &@1 (shader_in uint64_t)
> vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */
> vec1 64 ssa_4 = load_const (0x   0 /* 0.00 */)
> vec1 64 ssa_5 = load_const (0x   0 /* 0.00 */)
> vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint)
> vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2)
> vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */
> vec1 64 ssa_9 = deref_var &@2 (function_temp uint[3])
> vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3])
> vec1 64 ssa_11 = load_const (0x   0 /* 0.00 */)
> vec1 64 ssa_13 = load_const (0x   0 /* 0.00 */)
> vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /*
> &@2[0] */
> intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /*
> access=0 */
> vec1 64 ssa_15 = deref_cast (uint *)ssa_1 (global uint)
> vec1 64 ssa_16 = load_const (0x   1 /* 0.00 */)
> vec1 64 ssa_17 = deref_ptr_as_array &(*ssa_15)[1] (global uint) /*
> &(*(uint *)ssa_1)[1] */
> vec1 32 ssa_18 = intrinsic load_deref (ssa_17) (0) /* access=0 */
> vec1 64 ssa_19 = deref_var &@2 (function_temp uint[3])
> vec1 64 ssa_20 = deref_cast (uint[3] *)ssa_19 (function_temp uint[3])
> vec1 64 ssa_21 = load_const (0x   0 /* 0.00 */)
> vec1 64 ssa_23 = load_const (0x   1 /* 0.00 */)
> vec1 64 ssa_24 = deref_array &(*ssa_19)[1] (function_temp uint) /*
> &@2[1] */
> intrinsic store_deref (ssa_24, ssa_18) (1, 0) /* wrmask=x */ /*
> access=0 */
> vec1 64 ssa_25 = deref_cast (uint *)ssa_1 (global uint)
> vec1 64 ssa_26 = load_const (0x   2 /* 0.00 */)
> vec1 64 ssa_27 = deref_ptr_as_array &(*ssa_25)[2] (global uint) /*
> &(*(uint *)ssa_1)[2] */
> vec1 32 ssa_28 = intrinsic load_deref (ssa_27) (0) /* access=0 */
> vec1 64 ssa_29 = deref_var &@2 (function_temp uint[3])
> vec1 64 ssa_30 = deref_cast (uint[3] *)ssa_29 (function_temp uint[3])
> vec1 64 ssa_31 = load_const (0x   0 /* 0.00 */)
> vec1 64 ssa_33 = load_const (0x   2 /* 0.00 */)
> vec1 64 ssa_34 = deref_array &(*ssa_29)[2] (function_temp uint) /*
> &@2[2] */
> intrinsic store_deref (ssa_34, ssa_28) (1, 0) /* wrmask=x */ /*
> access=0 */
> vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_14 (function_temp uvec2)
> vec2 32 ssa_37 = intrinsic load_deref (ssa_35) (0) /* access=0 */
> intrinsic store_deref (ssa_7, ssa_37) (3, 0) /* wrmask=xy */ /*
> access=0 */
> vec1 64 ssa_38 = deref_cast (uvec2 *)ssa_24 (function_temp uvec2)
> vec2 32 ssa_40 = intrinsic load_deref (ssa_38) (0) /* access=0 */
> vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_3 (global uvec2)
> vec1 64 ssa_42 = load_const (0x   1 /* 0.00 */)
> vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[1] (global uvec2)
> /* &(*(uvec2 *)ssa_3)[1] */
> intrinsic store_deref (ssa_43, ssa_40) (3, 0) /* wrmask=xy */ /*
> access=0 */
> /* succs: block_1 */
> block block_1:
> }
>
> nir_opt_copy_prop_vars
> ## nir_copy_prop_vars_impl for __wrapped_vload2_private
> # block0
>
>   vec1 64 ssa_0 = deref_var &unnamed (shader_in uint64_t)
>   vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */
> uint64_t unnamed: ssa_1
>
>   vec1 64 ssa_2 = deref_var &unnamed (shader_in uint64_t)
>   vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */
> uint64_t unnamed: ssa_1
> uint64_t unnamed: ssa_3
>
>   vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint)
>   vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2)
>   vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */
> uint64_t unnamed: ssa_1
> uint64_t unnamed: ssa_3
> uint (uint *)ssa_1: ssa_8
>
>   vec1 64 ssa_9 = deref_var &unnamed (function_temp uint[3])
>   vec1 64 ssa_10 = deref_cast (uint[

[Mesa-dev] nir_opt_copy_prop_vars doing the wrong thing

2019-05-09 Thread Dave Airlie
I've got a bunch of cases where copy prop vars is getting things wrong
around casts, it finds a store to an vec2 but ends up with the
writemask staying at 0x3 but the item being store being a single
64-bit.

Debug is attached below.

Dave.

nir_lower_memcpy_deref
shader: MESA_SHADER_KERNEL
local-size: 0, 0, 0 (variable)
shared-size: 1
inputs: 16
outputs: 0
uniforms: 0
shared: 0
decl_var shader_in INTERP_MODE_NONE uint64_t @0 (0.x, 0, 0)
decl_var shader_in INTERP_MODE_NONE uint64_t @1 (1.x, 8, 0)
decl_function __wrapped_vload2_private (0 params)

impl __wrapped_vload2_private {
decl_var  INTERP_MODE_NONE uint[3] @2
block block_0:
/* preds: */
vec1 64 ssa_0 = deref_var &@0 (shader_in uint64_t)
vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */
vec1 64 ssa_2 = deref_var &@1 (shader_in uint64_t)
vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */
vec1 64 ssa_4 = load_const (0x   0 /* 0.00 */)
vec1 64 ssa_5 = load_const (0x   0 /* 0.00 */)
vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint)
vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2)
vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */
vec1 64 ssa_9 = deref_var &@2 (function_temp uint[3])
vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3])
vec1 64 ssa_11 = load_const (0x   0 /* 0.00 */)
vec1 64 ssa_13 = load_const (0x   0 /* 0.00 */)
vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /* &@2[0] */
intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /* access=0 */
vec1 64 ssa_15 = deref_cast (uint *)ssa_1 (global uint)
vec1 64 ssa_16 = load_const (0x   1 /* 0.00 */)
vec1 64 ssa_17 = deref_ptr_as_array &(*ssa_15)[1] (global uint) /*
&(*(uint *)ssa_1)[1] */
vec1 32 ssa_18 = intrinsic load_deref (ssa_17) (0) /* access=0 */
vec1 64 ssa_19 = deref_var &@2 (function_temp uint[3])
vec1 64 ssa_20 = deref_cast (uint[3] *)ssa_19 (function_temp uint[3])
vec1 64 ssa_21 = load_const (0x   0 /* 0.00 */)
vec1 64 ssa_23 = load_const (0x   1 /* 0.00 */)
vec1 64 ssa_24 = deref_array &(*ssa_19)[1] (function_temp uint) /* &@2[1] */
intrinsic store_deref (ssa_24, ssa_18) (1, 0) /* wrmask=x */ /* access=0 */
vec1 64 ssa_25 = deref_cast (uint *)ssa_1 (global uint)
vec1 64 ssa_26 = load_const (0x   2 /* 0.00 */)
vec1 64 ssa_27 = deref_ptr_as_array &(*ssa_25)[2] (global uint) /*
&(*(uint *)ssa_1)[2] */
vec1 32 ssa_28 = intrinsic load_deref (ssa_27) (0) /* access=0 */
vec1 64 ssa_29 = deref_var &@2 (function_temp uint[3])
vec1 64 ssa_30 = deref_cast (uint[3] *)ssa_29 (function_temp uint[3])
vec1 64 ssa_31 = load_const (0x   0 /* 0.00 */)
vec1 64 ssa_33 = load_const (0x   2 /* 0.00 */)
vec1 64 ssa_34 = deref_array &(*ssa_29)[2] (function_temp uint) /* &@2[2] */
intrinsic store_deref (ssa_34, ssa_28) (1, 0) /* wrmask=x */ /* access=0 */
vec1 64 ssa_35 = deref_cast (uvec2 *)ssa_14 (function_temp uvec2)
vec2 32 ssa_37 = intrinsic load_deref (ssa_35) (0) /* access=0 */
intrinsic store_deref (ssa_7, ssa_37) (3, 0) /* wrmask=xy */ /* access=0 */
vec1 64 ssa_38 = deref_cast (uvec2 *)ssa_24 (function_temp uvec2)
vec2 32 ssa_40 = intrinsic load_deref (ssa_38) (0) /* access=0 */
vec1 64 ssa_41 = deref_cast (uvec2 *)ssa_3 (global uvec2)
vec1 64 ssa_42 = load_const (0x   1 /* 0.00 */)
vec1 64 ssa_43 = deref_ptr_as_array &(*ssa_41)[1] (global uvec2)
/* &(*(uvec2 *)ssa_3)[1] */
intrinsic store_deref (ssa_43, ssa_40) (3, 0) /* wrmask=xy */ /* access=0 */
/* succs: block_1 */
block block_1:
}

nir_opt_copy_prop_vars
## nir_copy_prop_vars_impl for __wrapped_vload2_private
# block0

  vec1 64 ssa_0 = deref_var &unnamed (shader_in uint64_t)
  vec1 64 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */
uint64_t unnamed: ssa_1

  vec1 64 ssa_2 = deref_var &unnamed (shader_in uint64_t)
  vec1 64 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */
uint64_t unnamed: ssa_1
uint64_t unnamed: ssa_3

  vec1 64 ssa_6 = deref_cast (uint *)ssa_1 (global uint)
  vec1 64 ssa_7 = deref_cast (uvec2 *)ssa_3 (global uvec2)
  vec1 32 ssa_8 = intrinsic load_deref (ssa_6) (0) /* access=0 */
uint64_t unnamed: ssa_1
uint64_t unnamed: ssa_3
uint (uint *)ssa_1: ssa_8

  vec1 64 ssa_9 = deref_var &unnamed (function_temp uint[3])
  vec1 64 ssa_10 = deref_cast (uint[3] *)ssa_9 (function_temp uint[3])
  vec1 64 ssa_14 = deref_array &(*ssa_9)[0] (function_temp uint) /*
&unnamed[0] */
  intrinsic store_deref (ssa_14, ssa_8) (1, 0) /* wrmask=x */ /* access=0 */
uint64_t unnamed: ssa_1
uint64_t unnamed: ssa_3
uint (uint *)ssa_1: ssa_8
uint unnamed[0]: ssa_8

  vec1 64 ssa_15 = deref_cast (uint *)ssa_1