Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot
On Tuesday, July 11, 2017 6:02:30 PM PDT Matt Turner wrote: > On Mon, Jul 10, 2017 at 4:05 PM, Connor Abbottwrote: > > On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner wrote: > >> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott wrote: > >>> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner wrote: > We already had a channel_num system value, which I'm renaming to > subgroup_invocation to match the rest of the new system values. > > Note that while ballotARB(true) will return zeros in the high 32-bits on > systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB > variables do not consider whether channels are enabled. See issue (1) of > ARB_shader_ballot. > --- > src/compiler/nir/nir.c | 4 > src/compiler/nir/nir_intrinsics.h | 8 +++- > src/compiler/nir/nir_lower_system_values.c | 28 > > src/intel/compiler/brw_fs_nir.cpp | 2 +- > src/intel/compiler/brw_nir_intrinsics.c| 4 ++-- > 5 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c > index 491b908396..9827e129ca 100644 > --- a/src/compiler/nir/nir.c > +++ b/src/compiler/nir/nir.c > @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value > val) > return nir_intrinsic_load_helper_invocation; > case SYSTEM_VALUE_VIEW_INDEX: > return nir_intrinsic_load_view_index; > + case SYSTEM_VALUE_SUBGROUP_SIZE: > + return nir_intrinsic_load_subgroup_size; > + case SYSTEM_VALUE_SUBGROUP_INVOCATION: > + return nir_intrinsic_load_subgroup_invocation; > default: > unreachable("system value does not directly correspond to > intrinsic"); > } > diff --git a/src/compiler/nir/nir_intrinsics.h > b/src/compiler/nir/nir_intrinsics.h > index 6c6ba4cf59..96ecfbc338 100644 > --- a/src/compiler/nir/nir_intrinsics.h > +++ b/src/compiler/nir/nir_intrinsics.h > @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) > SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) > SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) > SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) > -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) > SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) > SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) > SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) > > /* Blend constant color values. Float values are clamped. */ > SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) > diff --git a/src/compiler/nir/nir_lower_system_values.c > b/src/compiler/nir/nir_lower_system_values.c > index 810100a081..faf0c3c9da 100644 > --- a/src/compiler/nir/nir_lower_system_values.c > +++ b/src/compiler/nir/nir_lower_system_values.c > @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) > nir_load_base_instance(b)); > break; > > + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: > + case SYSTEM_VALUE_SUBGROUP_GE_MASK: > + case SYSTEM_VALUE_SUBGROUP_GT_MASK: > + case SYSTEM_VALUE_SUBGROUP_LE_MASK: > + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { > + nir_ssa_def *count = nir_load_subgroup_invocation(b); > + > + switch (var->data.location) { > + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_GE_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_GT_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_LE_MASK: > +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), > count)); > +break; > + case SYSTEM_VALUE_SUBGROUP_LT_MASK: > +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), > count)); > +break; > + default: > +unreachable("you seriously can't tell this is > unreachable?"); > + } > + } > + > >>> > >>>
Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot
On Tue, Jul 11, 2017 at 6:02 PM, Matt Turnerwrote: > On Mon, Jul 10, 2017 at 4:05 PM, Connor Abbott wrote: >> On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner wrote: >>> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott wrote: On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner wrote: > We already had a channel_num system value, which I'm renaming to > subgroup_invocation to match the rest of the new system values. > > Note that while ballotARB(true) will return zeros in the high 32-bits on > systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB > variables do not consider whether channels are enabled. See issue (1) of > ARB_shader_ballot. > --- > src/compiler/nir/nir.c | 4 > src/compiler/nir/nir_intrinsics.h | 8 +++- > src/compiler/nir/nir_lower_system_values.c | 28 > > src/intel/compiler/brw_fs_nir.cpp | 2 +- > src/intel/compiler/brw_nir_intrinsics.c| 4 ++-- > 5 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c > index 491b908396..9827e129ca 100644 > --- a/src/compiler/nir/nir.c > +++ b/src/compiler/nir/nir.c > @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value > val) >return nir_intrinsic_load_helper_invocation; > case SYSTEM_VALUE_VIEW_INDEX: >return nir_intrinsic_load_view_index; > + case SYSTEM_VALUE_SUBGROUP_SIZE: > + return nir_intrinsic_load_subgroup_size; > + case SYSTEM_VALUE_SUBGROUP_INVOCATION: > + return nir_intrinsic_load_subgroup_invocation; > default: >unreachable("system value does not directly correspond to > intrinsic"); > } > diff --git a/src/compiler/nir/nir_intrinsics.h > b/src/compiler/nir/nir_intrinsics.h > index 6c6ba4cf59..96ecfbc338 100644 > --- a/src/compiler/nir/nir_intrinsics.h > +++ b/src/compiler/nir/nir_intrinsics.h > @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) > SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) > SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) > SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) > -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) > SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) > SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) > SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) > > /* Blend constant color values. Float values are clamped. */ > SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) > diff --git a/src/compiler/nir/nir_lower_system_values.c > b/src/compiler/nir/nir_lower_system_values.c > index 810100a081..faf0c3c9da 100644 > --- a/src/compiler/nir/nir_lower_system_values.c > +++ b/src/compiler/nir/nir_lower_system_values.c > @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) > nir_load_base_instance(b)); > break; > > + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: > + case SYSTEM_VALUE_SUBGROUP_GE_MASK: > + case SYSTEM_VALUE_SUBGROUP_GT_MASK: > + case SYSTEM_VALUE_SUBGROUP_LE_MASK: > + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { > + nir_ssa_def *count = nir_load_subgroup_invocation(b); > + > + switch (var->data.location) { > + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_GE_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_GT_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_LE_MASK: > +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), > count)); > +break; > + case SYSTEM_VALUE_SUBGROUP_LT_MASK: > +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), > count)); > +break; > + default: > +unreachable("you seriously can't tell this is unreachable?"); > + } > + } > + While this fine to do for both Intel and AMD, Nvidia actually has special system values for
Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot
On Mon, Jul 10, 2017 at 4:05 PM, Connor Abbottwrote: > On Mon, Jul 10, 2017 at 3:50 PM, Matt Turner wrote: >> On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott wrote: >>> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner wrote: We already had a channel_num system value, which I'm renaming to subgroup_invocation to match the rest of the new system values. Note that while ballotARB(true) will return zeros in the high 32-bits on systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB variables do not consider whether channels are enabled. See issue (1) of ARB_shader_ballot. --- src/compiler/nir/nir.c | 4 src/compiler/nir/nir_intrinsics.h | 8 +++- src/compiler/nir/nir_lower_system_values.c | 28 src/intel/compiler/brw_fs_nir.cpp | 2 +- src/intel/compiler/brw_nir_intrinsics.c| 4 ++-- 5 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 491b908396..9827e129ca 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_helper_invocation; case SYSTEM_VALUE_VIEW_INDEX: return nir_intrinsic_load_view_index; + case SYSTEM_VALUE_SUBGROUP_SIZE: + return nir_intrinsic_load_subgroup_size; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return nir_intrinsic_load_subgroup_invocation; default: unreachable("system value does not directly correspond to intrinsic"); } diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 6c6ba4cf59..96ecfbc338 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) /* Blend constant color values. Float values are clamped. */ SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 810100a081..faf0c3c9da 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) nir_load_base_instance(b)); break; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { + nir_ssa_def *count = nir_load_subgroup_invocation(b); + + switch (var->data.location) { + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); +break; + case SYSTEM_VALUE_SUBGROUP_GE_MASK: +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); +break; + case SYSTEM_VALUE_SUBGROUP_GT_MASK: +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); +break; + case SYSTEM_VALUE_SUBGROUP_LE_MASK: +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count)); +break; + case SYSTEM_VALUE_SUBGROUP_LT_MASK: +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count)); +break; + default: +unreachable("you seriously can't tell this is unreachable?"); + } + } + >>> >>> While this fine to do for both Intel and AMD, Nvidia actually has >>> special system values for these, and AMD has special instructions for >>> bitCount(foo & gl_SubGroupLtMask), so I think we should have actual >> >> So, just add this to the above switch statement? >> >>if
Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot
On Mon, Jul 10, 2017 at 3:50 PM, Matt Turnerwrote: > On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbott wrote: >> On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner wrote: >>> We already had a channel_num system value, which I'm renaming to >>> subgroup_invocation to match the rest of the new system values. >>> >>> Note that while ballotARB(true) will return zeros in the high 32-bits on >>> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB >>> variables do not consider whether channels are enabled. See issue (1) of >>> ARB_shader_ballot. >>> --- >>> src/compiler/nir/nir.c | 4 >>> src/compiler/nir/nir_intrinsics.h | 8 +++- >>> src/compiler/nir/nir_lower_system_values.c | 28 >>> >>> src/intel/compiler/brw_fs_nir.cpp | 2 +- >>> src/intel/compiler/brw_nir_intrinsics.c| 4 ++-- >>> 5 files changed, 42 insertions(+), 4 deletions(-) >>> >>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c >>> index 491b908396..9827e129ca 100644 >>> --- a/src/compiler/nir/nir.c >>> +++ b/src/compiler/nir/nir.c >>> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val) >>>return nir_intrinsic_load_helper_invocation; >>> case SYSTEM_VALUE_VIEW_INDEX: >>>return nir_intrinsic_load_view_index; >>> + case SYSTEM_VALUE_SUBGROUP_SIZE: >>> + return nir_intrinsic_load_subgroup_size; >>> + case SYSTEM_VALUE_SUBGROUP_INVOCATION: >>> + return nir_intrinsic_load_subgroup_invocation; >>> default: >>>unreachable("system value does not directly correspond to >>> intrinsic"); >>> } >>> diff --git a/src/compiler/nir/nir_intrinsics.h >>> b/src/compiler/nir/nir_intrinsics.h >>> index 6c6ba4cf59..96ecfbc338 100644 >>> --- a/src/compiler/nir/nir_intrinsics.h >>> +++ b/src/compiler/nir/nir_intrinsics.h >>> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) >>> SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) >>> SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) >>> SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) >>> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) >>> SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) >>> SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) >>> SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) >>> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) >>> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) >>> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) >>> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) >>> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) >>> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) >>> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) >>> >>> /* Blend constant color values. Float values are clamped. */ >>> SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) >>> diff --git a/src/compiler/nir/nir_lower_system_values.c >>> b/src/compiler/nir/nir_lower_system_values.c >>> index 810100a081..faf0c3c9da 100644 >>> --- a/src/compiler/nir/nir_lower_system_values.c >>> +++ b/src/compiler/nir/nir_lower_system_values.c >>> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) >>> nir_load_base_instance(b)); >>> break; >>> >>> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: >>> + case SYSTEM_VALUE_SUBGROUP_GE_MASK: >>> + case SYSTEM_VALUE_SUBGROUP_GT_MASK: >>> + case SYSTEM_VALUE_SUBGROUP_LE_MASK: >>> + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { >>> + nir_ssa_def *count = nir_load_subgroup_invocation(b); >>> + >>> + switch (var->data.location) { >>> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: >>> +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); >>> +break; >>> + case SYSTEM_VALUE_SUBGROUP_GE_MASK: >>> +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); >>> +break; >>> + case SYSTEM_VALUE_SUBGROUP_GT_MASK: >>> +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); >>> +break; >>> + case SYSTEM_VALUE_SUBGROUP_LE_MASK: >>> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), >>> count)); >>> +break; >>> + case SYSTEM_VALUE_SUBGROUP_LT_MASK: >>> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), >>> count)); >>> +break; >>> + default: >>> +unreachable("you seriously can't tell this is unreachable?"); >>> + } >>> + } >>> + >> >> While this fine to do for both Intel and AMD, Nvidia actually has >> special system values for these, and AMD has special instructions for >> bitCount(foo & gl_SubGroupLtMask), so I think we should have actual > > So, just add this to the above switch statement? > >if (!b->shader->options->lower_subgroup_masks) > break; > > I'll also add the missing cases to nir_intrinsic_from_system_value() > and nir_system_value_from_intrinsic(). Well,
Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot
On Mon, Jul 10, 2017 at 1:10 PM, Connor Abbottwrote: > On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner wrote: >> We already had a channel_num system value, which I'm renaming to >> subgroup_invocation to match the rest of the new system values. >> >> Note that while ballotARB(true) will return zeros in the high 32-bits on >> systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB >> variables do not consider whether channels are enabled. See issue (1) of >> ARB_shader_ballot. >> --- >> src/compiler/nir/nir.c | 4 >> src/compiler/nir/nir_intrinsics.h | 8 +++- >> src/compiler/nir/nir_lower_system_values.c | 28 >> src/intel/compiler/brw_fs_nir.cpp | 2 +- >> src/intel/compiler/brw_nir_intrinsics.c| 4 ++-- >> 5 files changed, 42 insertions(+), 4 deletions(-) >> >> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c >> index 491b908396..9827e129ca 100644 >> --- a/src/compiler/nir/nir.c >> +++ b/src/compiler/nir/nir.c >> @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val) >>return nir_intrinsic_load_helper_invocation; >> case SYSTEM_VALUE_VIEW_INDEX: >>return nir_intrinsic_load_view_index; >> + case SYSTEM_VALUE_SUBGROUP_SIZE: >> + return nir_intrinsic_load_subgroup_size; >> + case SYSTEM_VALUE_SUBGROUP_INVOCATION: >> + return nir_intrinsic_load_subgroup_invocation; >> default: >>unreachable("system value does not directly correspond to intrinsic"); >> } >> diff --git a/src/compiler/nir/nir_intrinsics.h >> b/src/compiler/nir/nir_intrinsics.h >> index 6c6ba4cf59..96ecfbc338 100644 >> --- a/src/compiler/nir/nir_intrinsics.h >> +++ b/src/compiler/nir/nir_intrinsics.h >> @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) >> SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) >> SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) >> SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) >> -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) >> SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) >> SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) >> SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) >> +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) >> +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) >> +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) >> +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) >> +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) >> +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) >> +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) >> >> /* Blend constant color values. Float values are clamped. */ >> SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) >> diff --git a/src/compiler/nir/nir_lower_system_values.c >> b/src/compiler/nir/nir_lower_system_values.c >> index 810100a081..faf0c3c9da 100644 >> --- a/src/compiler/nir/nir_lower_system_values.c >> +++ b/src/compiler/nir/nir_lower_system_values.c >> @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) >> nir_load_base_instance(b)); >> break; >> >> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: >> + case SYSTEM_VALUE_SUBGROUP_GE_MASK: >> + case SYSTEM_VALUE_SUBGROUP_GT_MASK: >> + case SYSTEM_VALUE_SUBGROUP_LE_MASK: >> + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { >> + nir_ssa_def *count = nir_load_subgroup_invocation(b); >> + >> + switch (var->data.location) { >> + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: >> +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); >> +break; >> + case SYSTEM_VALUE_SUBGROUP_GE_MASK: >> +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); >> +break; >> + case SYSTEM_VALUE_SUBGROUP_GT_MASK: >> +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); >> +break; >> + case SYSTEM_VALUE_SUBGROUP_LE_MASK: >> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), >> count)); >> +break; >> + case SYSTEM_VALUE_SUBGROUP_LT_MASK: >> +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), >> count)); >> +break; >> + default: >> +unreachable("you seriously can't tell this is unreachable?"); >> + } >> + } >> + > > While this fine to do for both Intel and AMD, Nvidia actually has > special system values for these, and AMD has special instructions for > bitCount(foo & gl_SubGroupLtMask), so I think we should have actual So, just add this to the above switch statement? if (!b->shader->options->lower_subgroup_masks) break; I'll also add the missing cases to nir_intrinsic_from_system_value() and nir_system_value_from_intrinsic(). > nir_load_subgroup_*_mask intrinsics for these. Also, that way you can > use the same shrinking logic to turn these into 32-bit shifts on > Intel. The channel liveness doesn't affect SubGroup*Mask.
Re: [Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot
On Thu, Jul 6, 2017 at 4:48 PM, Matt Turnerwrote: > We already had a channel_num system value, which I'm renaming to > subgroup_invocation to match the rest of the new system values. > > Note that while ballotARB(true) will return zeros in the high 32-bits on > systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB > variables do not consider whether channels are enabled. See issue (1) of > ARB_shader_ballot. > --- > src/compiler/nir/nir.c | 4 > src/compiler/nir/nir_intrinsics.h | 8 +++- > src/compiler/nir/nir_lower_system_values.c | 28 > src/intel/compiler/brw_fs_nir.cpp | 2 +- > src/intel/compiler/brw_nir_intrinsics.c| 4 ++-- > 5 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c > index 491b908396..9827e129ca 100644 > --- a/src/compiler/nir/nir.c > +++ b/src/compiler/nir/nir.c > @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val) >return nir_intrinsic_load_helper_invocation; > case SYSTEM_VALUE_VIEW_INDEX: >return nir_intrinsic_load_view_index; > + case SYSTEM_VALUE_SUBGROUP_SIZE: > + return nir_intrinsic_load_subgroup_size; > + case SYSTEM_VALUE_SUBGROUP_INVOCATION: > + return nir_intrinsic_load_subgroup_invocation; > default: >unreachable("system value does not directly correspond to intrinsic"); > } > diff --git a/src/compiler/nir/nir_intrinsics.h > b/src/compiler/nir/nir_intrinsics.h > index 6c6ba4cf59..96ecfbc338 100644 > --- a/src/compiler/nir/nir_intrinsics.h > +++ b/src/compiler/nir/nir_intrinsics.h > @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) > SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) > SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) > SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) > -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) > SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) > SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) > SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) > > /* Blend constant color values. Float values are clamped. */ > SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) > diff --git a/src/compiler/nir/nir_lower_system_values.c > b/src/compiler/nir/nir_lower_system_values.c > index 810100a081..faf0c3c9da 100644 > --- a/src/compiler/nir/nir_lower_system_values.c > +++ b/src/compiler/nir/nir_lower_system_values.c > @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) > nir_load_base_instance(b)); > break; > > + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: > + case SYSTEM_VALUE_SUBGROUP_GE_MASK: > + case SYSTEM_VALUE_SUBGROUP_GT_MASK: > + case SYSTEM_VALUE_SUBGROUP_LE_MASK: > + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { > + nir_ssa_def *count = nir_load_subgroup_invocation(b); > + > + switch (var->data.location) { > + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_GE_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_GT_MASK: > +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); > +break; > + case SYSTEM_VALUE_SUBGROUP_LE_MASK: > +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), > count)); > +break; > + case SYSTEM_VALUE_SUBGROUP_LT_MASK: > +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), > count)); > +break; > + default: > +unreachable("you seriously can't tell this is unreachable?"); > + } > + } > + While this fine to do for both Intel and AMD, Nvidia actually has special system values for these, and AMD has special instructions for bitCount(foo & gl_SubGroupLtMask), so I think we should have actual nir_load_subgroup_*_mask intrinsics for these. Also, that way you can use the same shrinking logic to turn these into 32-bit shifts on Intel. >default: > break; >} > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 264398f38e..17f35e081d 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -4075,7 +4075,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , > nir_intrinsic_instr *instr >break; > } > > - case nir_intrinsic_load_channel_num: { > + case
[Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot
We already had a channel_num system value, which I'm renaming to subgroup_invocation to match the rest of the new system values. Note that while ballotARB(true) will return zeros in the high 32-bits on systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB variables do not consider whether channels are enabled. See issue (1) of ARB_shader_ballot. --- src/compiler/nir/nir.c | 4 src/compiler/nir/nir_intrinsics.h | 8 +++- src/compiler/nir/nir_lower_system_values.c | 28 src/intel/compiler/brw_fs_nir.cpp | 2 +- src/intel/compiler/brw_nir_intrinsics.c| 4 ++-- 5 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 491b908396..9827e129ca 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_helper_invocation; case SYSTEM_VALUE_VIEW_INDEX: return nir_intrinsic_load_view_index; + case SYSTEM_VALUE_SUBGROUP_SIZE: + return nir_intrinsic_load_subgroup_size; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return nir_intrinsic_load_subgroup_invocation; default: unreachable("system value does not directly correspond to intrinsic"); } diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 6c6ba4cf59..96ecfbc338 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) /* Blend constant color values. Float values are clamped. */ SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 810100a081..faf0c3c9da 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b) nir_load_base_instance(b)); break; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { + nir_ssa_def *count = nir_load_subgroup_invocation(b); + + switch (var->data.location) { + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: +sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count); +break; + case SYSTEM_VALUE_SUBGROUP_GE_MASK: +sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count); +break; + case SYSTEM_VALUE_SUBGROUP_GT_MASK: +sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count); +break; + case SYSTEM_VALUE_SUBGROUP_LE_MASK: +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count)); +break; + case SYSTEM_VALUE_SUBGROUP_LT_MASK: +sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count)); +break; + default: +unreachable("you seriously can't tell this is unreachable?"); + } + } + default: break; } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 264398f38e..17f35e081d 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4075,7 +4075,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_channel_num: { + case nir_intrinsic_load_subgroup_invocation: { fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW); dest = retype(dest, BRW_REGISTER_TYPE_UD); const fs_builder allbld8 = bld.group(8, 0).exec_all(); diff --git a/src/intel/compiler/brw_nir_intrinsics.c b/src/intel/compiler/brw_nir_intrinsics.c index d63570fa2a..abbbc6f93e 100644 --- a/src/intel/compiler/brw_nir_intrinsics.c +++ b/src/intel/compiler/brw_nir_intrinsics.c @@ -88,10 +88,10 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state, /* We construct the local invocation index from: * *gl_LocalInvocationIndex = -