Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-17 Thread Kenneth Graunke
On Tuesday, July 11, 2017 6:02:30 PM PDT Matt Turner wrote:
> On Mon, Jul 10, 2017 at 4:05 PM, Connor Abbott  wrote:
> > On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner  wrote:
> >> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott  wrote:
> >>> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
>  We already had a channel_num system value, which I'm renaming to
>  subgroup_invocation to match the rest of the new system values.
> 
>  Note that while ballotARB(true) will return zeros in the high 32-bits on
>  systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
>  variables do not consider whether channels are enabled. See issue (1) of
>  ARB_shader_ballot.
>  ---
>   src/compiler/nir/nir.c |  4 
>   src/compiler/nir/nir_intrinsics.h  |  8 +++-
>   src/compiler/nir/nir_lower_system_values.c | 28 
>  
>   src/intel/compiler/brw_fs_nir.cpp  |  2 +-
>   src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
>   5 files changed, 42 insertions(+), 4 deletions(-)
> 
>  diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>  index 491b908396..9827e129ca 100644
>  --- a/src/compiler/nir/nir.c
>  +++ b/src/compiler/nir/nir.c
>  @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value 
>  val)
> return nir_intrinsic_load_helper_invocation;
>  case SYSTEM_VALUE_VIEW_INDEX:
> return nir_intrinsic_load_view_index;
>  +   case SYSTEM_VALUE_SUBGROUP_SIZE:
>  +  return nir_intrinsic_load_subgroup_size;
>  +   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
>  +  return nir_intrinsic_load_subgroup_invocation;
>  default:
> unreachable("system value does not directly correspond to 
>  intrinsic");
>  }
>  diff --git a/src/compiler/nir/nir_intrinsics.h 
>  b/src/compiler/nir/nir_intrinsics.h
>  index 6c6ba4cf59..96ecfbc338 100644
>  --- a/src/compiler/nir/nir_intrinsics.h
>  +++ b/src/compiler/nir/nir_intrinsics.h
>  @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
>   SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
>   SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
>   SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
>  -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
>   SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
>   SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
>   SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
>  +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
>  +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
>  +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
>  +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
>  +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
>  +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
>  +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
> 
>   /* Blend constant color values.  Float values are clamped. */
>   SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
>  diff --git a/src/compiler/nir/nir_lower_system_values.c 
>  b/src/compiler/nir/nir_lower_system_values.c
>  index 810100a081..faf0c3c9da 100644
>  --- a/src/compiler/nir/nir_lower_system_values.c
>  +++ b/src/compiler/nir/nir_lower_system_values.c
>  @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
>  nir_load_base_instance(b));
>    break;
> 
>  +  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
>  +  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
>  +  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
>  +  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
>  +  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
>  + nir_ssa_def *count = nir_load_subgroup_invocation(b);
>  +
>  + switch (var->data.location) {
>  + case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
>  +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
>  +break;
>  + case SYSTEM_VALUE_SUBGROUP_GE_MASK:
>  +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
>  +break;
>  + case SYSTEM_VALUE_SUBGROUP_GT_MASK:
>  +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
>  +break;
>  + case SYSTEM_VALUE_SUBGROUP_LE_MASK:
>  +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), 
>  count));
>  +break;
>  + case SYSTEM_VALUE_SUBGROUP_LT_MASK:
>  +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), 
>  count));
>  +break;
>  + default:
>  +unreachable("you seriously can't tell this is 
>  unreachable?");
>  + }
>  +  }
>  +
> >>>
> >>> 

Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-11 Thread Connor Abbott
On Tue, Jul 11, 2017 at 6:02 PM, Matt Turner  wrote:
> On Mon, Jul 10, 2017 at 4:05 PM, Connor Abbott  wrote:
>> On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner  wrote:
>>> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott  wrote:
 On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
> We already had a channel_num system value, which I'm renaming to
> subgroup_invocation to match the rest of the new system values.
>
> Note that while ballotARB(true) will return zeros in the high 32-bits on
> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
> variables do not consider whether channels are enabled. See issue (1) of
> ARB_shader_ballot.
> ---
>  src/compiler/nir/nir.c |  4 
>  src/compiler/nir/nir_intrinsics.h  |  8 +++-
>  src/compiler/nir/nir_lower_system_values.c | 28 
> 
>  src/intel/compiler/brw_fs_nir.cpp  |  2 +-
>  src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
>  5 files changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
> index 491b908396..9827e129ca 100644
> --- a/src/compiler/nir/nir.c
> +++ b/src/compiler/nir/nir.c
> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value 
> val)
>return nir_intrinsic_load_helper_invocation;
> case SYSTEM_VALUE_VIEW_INDEX:
>return nir_intrinsic_load_view_index;
> +   case SYSTEM_VALUE_SUBGROUP_SIZE:
> +  return nir_intrinsic_load_subgroup_size;
> +   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
> +  return nir_intrinsic_load_subgroup_invocation;
> default:
>unreachable("system value does not directly correspond to 
> intrinsic");
> }
> diff --git a/src/compiler/nir/nir_intrinsics.h 
> b/src/compiler/nir/nir_intrinsics.h
> index 6c6ba4cf59..96ecfbc338 100644
> --- a/src/compiler/nir/nir_intrinsics.h
> +++ b/src/compiler/nir/nir_intrinsics.h
> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
>  SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
>  SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
>  SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
>
>  /* Blend constant color values.  Float values are clamped. */
>  SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
> diff --git a/src/compiler/nir/nir_lower_system_values.c 
> b/src/compiler/nir/nir_lower_system_values.c
> index 810100a081..faf0c3c9da 100644
> --- a/src/compiler/nir/nir_lower_system_values.c
> +++ b/src/compiler/nir/nir_lower_system_values.c
> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
> nir_load_base_instance(b));
>   break;
>
> +  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
> + nir_ssa_def *count = nir_load_subgroup_invocation(b);
> +
> + switch (var->data.location) {
> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
> +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
> +break;
> + case SYSTEM_VALUE_SUBGROUP_GE_MASK:
> +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
> +break;
> + case SYSTEM_VALUE_SUBGROUP_GT_MASK:
> +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
> +break;
> + case SYSTEM_VALUE_SUBGROUP_LE_MASK:
> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), 
> count));
> +break;
> + case SYSTEM_VALUE_SUBGROUP_LT_MASK:
> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), 
> count));
> +break;
> + default:
> +unreachable("you seriously can't tell this is unreachable?");
> + }
> +  }
> +

 While this fine to do for both Intel and AMD, Nvidia actually has
 special system values for 

Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-11 Thread Matt Turner
On Mon, Jul 10, 2017 at 4:05 PM, Connor Abbott  wrote:
> On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner  wrote:
>> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott  wrote:
>>> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
 We already had a channel_num system value, which I'm renaming to
 subgroup_invocation to match the rest of the new system values.

 Note that while ballotARB(true) will return zeros in the high 32-bits on
 systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
 variables do not consider whether channels are enabled. See issue (1) of
 ARB_shader_ballot.
 ---
  src/compiler/nir/nir.c |  4 
  src/compiler/nir/nir_intrinsics.h  |  8 +++-
  src/compiler/nir/nir_lower_system_values.c | 28 
 
  src/intel/compiler/brw_fs_nir.cpp  |  2 +-
  src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
  5 files changed, 42 insertions(+), 4 deletions(-)

 diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
 index 491b908396..9827e129ca 100644
 --- a/src/compiler/nir/nir.c
 +++ b/src/compiler/nir/nir.c
 @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_helper_invocation;
 case SYSTEM_VALUE_VIEW_INDEX:
return nir_intrinsic_load_view_index;
 +   case SYSTEM_VALUE_SUBGROUP_SIZE:
 +  return nir_intrinsic_load_subgroup_size;
 +   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 +  return nir_intrinsic_load_subgroup_invocation;
 default:
unreachable("system value does not directly correspond to 
 intrinsic");
 }
 diff --git a/src/compiler/nir/nir_intrinsics.h 
 b/src/compiler/nir/nir_intrinsics.h
 index 6c6ba4cf59..96ecfbc338 100644
 --- a/src/compiler/nir/nir_intrinsics.h
 +++ b/src/compiler/nir/nir_intrinsics.h
 @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
  SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
  SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
  SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
 -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
  SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
  SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
  SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
 +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
 +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
 +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
 +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
 +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
 +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
 +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)

  /* Blend constant color values.  Float values are clamped. */
  SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
 diff --git a/src/compiler/nir/nir_lower_system_values.c 
 b/src/compiler/nir/nir_lower_system_values.c
 index 810100a081..faf0c3c9da 100644
 --- a/src/compiler/nir/nir_lower_system_values.c
 +++ b/src/compiler/nir/nir_lower_system_values.c
 @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
 nir_load_base_instance(b));
   break;

 +  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 +  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 +  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 +  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 +  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
 + nir_ssa_def *count = nir_load_subgroup_invocation(b);
 +
 + switch (var->data.location) {
 + case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
 +break;
 + case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
 +break;
 + case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
 +break;
 + case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), 
 count));
 +break;
 + case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), 
 count));
 +break;
 + default:
 +unreachable("you seriously can't tell this is unreachable?");
 + }
 +  }
 +
>>>
>>> While this fine to do for both Intel and AMD, Nvidia actually has
>>> special system values for these, and AMD has special instructions for
>>> bitCount(foo & gl_SubGroupLtMask), so I think we should have actual
>>
>> So, just add this to the above switch statement?
>>
>>if 

Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-10 Thread Connor Abbott
On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner  wrote:
> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott  wrote:
>> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
>>> We already had a channel_num system value, which I'm renaming to
>>> subgroup_invocation to match the rest of the new system values.
>>>
>>> Note that while ballotARB(true) will return zeros in the high 32-bits on
>>> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
>>> variables do not consider whether channels are enabled. See issue (1) of
>>> ARB_shader_ballot.
>>> ---
>>>  src/compiler/nir/nir.c |  4 
>>>  src/compiler/nir/nir_intrinsics.h  |  8 +++-
>>>  src/compiler/nir/nir_lower_system_values.c | 28 
>>> 
>>>  src/intel/compiler/brw_fs_nir.cpp  |  2 +-
>>>  src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
>>>  5 files changed, 42 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>>> index 491b908396..9827e129ca 100644
>>> --- a/src/compiler/nir/nir.c
>>> +++ b/src/compiler/nir/nir.c
>>> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
>>>return nir_intrinsic_load_helper_invocation;
>>> case SYSTEM_VALUE_VIEW_INDEX:
>>>return nir_intrinsic_load_view_index;
>>> +   case SYSTEM_VALUE_SUBGROUP_SIZE:
>>> +  return nir_intrinsic_load_subgroup_size;
>>> +   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
>>> +  return nir_intrinsic_load_subgroup_invocation;
>>> default:
>>>unreachable("system value does not directly correspond to 
>>> intrinsic");
>>> }
>>> diff --git a/src/compiler/nir/nir_intrinsics.h 
>>> b/src/compiler/nir/nir_intrinsics.h
>>> index 6c6ba4cf59..96ecfbc338 100644
>>> --- a/src/compiler/nir/nir_intrinsics.h
>>> +++ b/src/compiler/nir/nir_intrinsics.h
>>> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
>>>  SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
>>>  SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
>>>  SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
>>> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
>>>  SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
>>>  SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
>>>  SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
>>> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
>>> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
>>> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
>>> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
>>> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
>>> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
>>> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
>>>
>>>  /* Blend constant color values.  Float values are clamped. */
>>>  SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
>>> diff --git a/src/compiler/nir/nir_lower_system_values.c 
>>> b/src/compiler/nir/nir_lower_system_values.c
>>> index 810100a081..faf0c3c9da 100644
>>> --- a/src/compiler/nir/nir_lower_system_values.c
>>> +++ b/src/compiler/nir/nir_lower_system_values.c
>>> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
>>> nir_load_base_instance(b));
>>>   break;
>>>
>>> +  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
>>> +  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
>>> +  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
>>> +  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
>>> +  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
>>> + nir_ssa_def *count = nir_load_subgroup_invocation(b);
>>> +
>>> + switch (var->data.location) {
>>> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
>>> +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
>>> +break;
>>> + case SYSTEM_VALUE_SUBGROUP_GE_MASK:
>>> +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
>>> +break;
>>> + case SYSTEM_VALUE_SUBGROUP_GT_MASK:
>>> +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
>>> +break;
>>> + case SYSTEM_VALUE_SUBGROUP_LE_MASK:
>>> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), 
>>> count));
>>> +break;
>>> + case SYSTEM_VALUE_SUBGROUP_LT_MASK:
>>> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), 
>>> count));
>>> +break;
>>> + default:
>>> +unreachable("you seriously can't tell this is unreachable?");
>>> + }
>>> +  }
>>> +
>>
>> While this fine to do for both Intel and AMD, Nvidia actually has
>> special system values for these, and AMD has special instructions for
>> bitCount(foo & gl_SubGroupLtMask), so I think we should have actual
>
> So, just add this to the above switch statement?
>
>if (!b->shader->options->lower_subgroup_masks)
>   break;
>
> I'll also add the missing cases to nir_intrinsic_from_system_value()
> and nir_system_value_from_intrinsic().

Well, 

Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-10 Thread Matt Turner
On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott  wrote:
> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
>> We already had a channel_num system value, which I'm renaming to
>> subgroup_invocation to match the rest of the new system values.
>>
>> Note that while ballotARB(true) will return zeros in the high 32-bits on
>> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
>> variables do not consider whether channels are enabled. See issue (1) of
>> ARB_shader_ballot.
>> ---
>>  src/compiler/nir/nir.c |  4 
>>  src/compiler/nir/nir_intrinsics.h  |  8 +++-
>>  src/compiler/nir/nir_lower_system_values.c | 28 
>>  src/intel/compiler/brw_fs_nir.cpp  |  2 +-
>>  src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
>>  5 files changed, 42 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>> index 491b908396..9827e129ca 100644
>> --- a/src/compiler/nir/nir.c
>> +++ b/src/compiler/nir/nir.c
>> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
>>return nir_intrinsic_load_helper_invocation;
>> case SYSTEM_VALUE_VIEW_INDEX:
>>return nir_intrinsic_load_view_index;
>> +   case SYSTEM_VALUE_SUBGROUP_SIZE:
>> +  return nir_intrinsic_load_subgroup_size;
>> +   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
>> +  return nir_intrinsic_load_subgroup_invocation;
>> default:
>>unreachable("system value does not directly correspond to intrinsic");
>> }
>> diff --git a/src/compiler/nir/nir_intrinsics.h 
>> b/src/compiler/nir/nir_intrinsics.h
>> index 6c6ba4cf59..96ecfbc338 100644
>> --- a/src/compiler/nir/nir_intrinsics.h
>> +++ b/src/compiler/nir/nir_intrinsics.h
>> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
>>  SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
>>  SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
>>  SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
>> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
>>  SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
>>  SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
>>  SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
>> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
>> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
>> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
>> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
>> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
>> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
>> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
>>
>>  /* Blend constant color values.  Float values are clamped. */
>>  SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
>> diff --git a/src/compiler/nir/nir_lower_system_values.c 
>> b/src/compiler/nir/nir_lower_system_values.c
>> index 810100a081..faf0c3c9da 100644
>> --- a/src/compiler/nir/nir_lower_system_values.c
>> +++ b/src/compiler/nir/nir_lower_system_values.c
>> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
>> nir_load_base_instance(b));
>>   break;
>>
>> +  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
>> +  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
>> +  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
>> +  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
>> +  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
>> + nir_ssa_def *count = nir_load_subgroup_invocation(b);
>> +
>> + switch (var->data.location) {
>> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
>> +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
>> +break;
>> + case SYSTEM_VALUE_SUBGROUP_GE_MASK:
>> +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
>> +break;
>> + case SYSTEM_VALUE_SUBGROUP_GT_MASK:
>> +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
>> +break;
>> + case SYSTEM_VALUE_SUBGROUP_LE_MASK:
>> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), 
>> count));
>> +break;
>> + case SYSTEM_VALUE_SUBGROUP_LT_MASK:
>> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), 
>> count));
>> +break;
>> + default:
>> +unreachable("you seriously can't tell this is unreachable?");
>> + }
>> +  }
>> +
>
> While this fine to do for both Intel and AMD, Nvidia actually has
> special system values for these, and AMD has special instructions for
> bitCount(foo & gl_SubGroupLtMask), so I think we should have actual

So, just add this to the above switch statement?

   if (!b->shader->options->lower_subgroup_masks)
  break;

I'll also add the missing cases to nir_intrinsic_from_system_value()
and nir_system_value_from_intrinsic().

> nir_load_subgroup_*_mask intrinsics for these. Also, that way you can
> use the same shrinking logic to turn these into 32-bit shifts on
> Intel.

The channel liveness doesn't affect SubGroup*Mask. 

Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-10 Thread Connor Abbott
On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
> We already had a channel_num system value, which I'm renaming to
> subgroup_invocation to match the rest of the new system values.
>
> Note that while ballotARB(true) will return zeros in the high 32-bits on
> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
> variables do not consider whether channels are enabled. See issue (1) of
> ARB_shader_ballot.
> ---
>  src/compiler/nir/nir.c |  4 
>  src/compiler/nir/nir_intrinsics.h  |  8 +++-
>  src/compiler/nir/nir_lower_system_values.c | 28 
>  src/intel/compiler/brw_fs_nir.cpp  |  2 +-
>  src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
>  5 files changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
> index 491b908396..9827e129ca 100644
> --- a/src/compiler/nir/nir.c
> +++ b/src/compiler/nir/nir.c
> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
>return nir_intrinsic_load_helper_invocation;
> case SYSTEM_VALUE_VIEW_INDEX:
>return nir_intrinsic_load_view_index;
> +   case SYSTEM_VALUE_SUBGROUP_SIZE:
> +  return nir_intrinsic_load_subgroup_size;
> +   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
> +  return nir_intrinsic_load_subgroup_invocation;
> default:
>unreachable("system value does not directly correspond to intrinsic");
> }
> diff --git a/src/compiler/nir/nir_intrinsics.h 
> b/src/compiler/nir/nir_intrinsics.h
> index 6c6ba4cf59..96ecfbc338 100644
> --- a/src/compiler/nir/nir_intrinsics.h
> +++ b/src/compiler/nir/nir_intrinsics.h
> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
>  SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
>  SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
>  SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
>
>  /* Blend constant color values.  Float values are clamped. */
>  SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
> diff --git a/src/compiler/nir/nir_lower_system_values.c 
> b/src/compiler/nir/nir_lower_system_values.c
> index 810100a081..faf0c3c9da 100644
> --- a/src/compiler/nir/nir_lower_system_values.c
> +++ b/src/compiler/nir/nir_lower_system_values.c
> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
> nir_load_base_instance(b));
>   break;
>
> +  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
> +  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
> + nir_ssa_def *count = nir_load_subgroup_invocation(b);
> +
> + switch (var->data.location) {
> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
> +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
> +break;
> + case SYSTEM_VALUE_SUBGROUP_GE_MASK:
> +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
> +break;
> + case SYSTEM_VALUE_SUBGROUP_GT_MASK:
> +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
> +break;
> + case SYSTEM_VALUE_SUBGROUP_LE_MASK:
> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), 
> count));
> +break;
> + case SYSTEM_VALUE_SUBGROUP_LT_MASK:
> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), 
> count));
> +break;
> + default:
> +unreachable("you seriously can't tell this is unreachable?");
> + }
> +  }
> +

While this fine to do for both Intel and AMD, Nvidia actually has
special system values for these, and AMD has special instructions for
bitCount(foo & gl_SubGroupLtMask), so I think we should have actual
nir_load_subgroup_*_mask intrinsics for these. Also, that way you can
use the same shrinking logic to turn these into 32-bit shifts on
Intel.

>default:
>   break;
>}
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 264398f38e..17f35e081d 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -4075,7 +4075,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
>break;
> }
>
> -   case nir_intrinsic_load_channel_num: {
> +   case 

[Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-06 Thread Matt Turner
We already had a channel_num system value, which I'm renaming to
subgroup_invocation to match the rest of the new system values.

Note that while ballotARB(true) will return zeros in the high 32-bits on
systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
variables do not consider whether channels are enabled. See issue (1) of
ARB_shader_ballot.
---
 src/compiler/nir/nir.c |  4 
 src/compiler/nir/nir_intrinsics.h  |  8 +++-
 src/compiler/nir/nir_lower_system_values.c | 28 
 src/intel/compiler/brw_fs_nir.cpp  |  2 +-
 src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
 5 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 491b908396..9827e129ca 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
   return nir_intrinsic_load_helper_invocation;
case SYSTEM_VALUE_VIEW_INDEX:
   return nir_intrinsic_load_view_index;
+   case SYSTEM_VALUE_SUBGROUP_SIZE:
+  return nir_intrinsic_load_subgroup_size;
+   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
+  return nir_intrinsic_load_subgroup_invocation;
default:
   unreachable("system value does not directly correspond to intrinsic");
}
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 6c6ba4cf59..96ecfbc338 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
 SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
 
 /* Blend constant color values.  Float values are clamped. */
 SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index 810100a081..faf0c3c9da 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
nir_load_base_instance(b));
  break;
 
+  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
+ nir_ssa_def *count = nir_load_subgroup_invocation(b);
+
+ switch (var->data.location) {
+ case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
+break;
+ case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
+break;
+ case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
+break;
+ case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count));
+break;
+ case SYSTEM_VALUE_SUBGROUP_LT_MASK:
+sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count));
+break;
+ default:
+unreachable("you seriously can't tell this is unreachable?");
+ }
+  }
+
   default:
  break;
   }
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 264398f38e..17f35e081d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4075,7 +4075,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
-   case nir_intrinsic_load_channel_num: {
+   case nir_intrinsic_load_subgroup_invocation: {
   fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
   dest = retype(dest, BRW_REGISTER_TYPE_UD);
   const fs_builder allbld8 = bld.group(8, 0).exec_all();
diff --git a/src/intel/compiler/brw_nir_intrinsics.c 
b/src/intel/compiler/brw_nir_intrinsics.c
index d63570fa2a..abbbc6f93e 100644
--- a/src/intel/compiler/brw_nir_intrinsics.c
+++ b/src/intel/compiler/brw_nir_intrinsics.c
@@ -88,10 +88,10 @@ lower_cs_intrinsics_convert_block(struct 
lower_intrinsics_state *state,
  /* We construct the local invocation index from:
   *
   *gl_LocalInvocationIndex =
-