Re: [Mesa-dev] [PATCH 3/3] i965/fs/nir: Don’t let nir lower nir_intrinsic_load_subgroup_all_mask
On Tue, Oct 31, 2017 at 10:55 AM, Neil Robertswrote: > Instead of letting nir lower nir_intrinsic_load_subgroup_all_mask this > is now generated directly. This is more efficient because it can be > calculated in the compiler based on the dispatch width. > > Sadly it’s still not totally ideal because the constant doesn’t seem > to get propagated and there is still a redundant MOV. > --- > src/intel/compiler/brw_compiler.c | 2 +- > src/intel/compiler/brw_fs_nir.cpp | 7 ++- > 2 files changed, 7 insertions(+), 2 deletions(-) > > diff --git a/src/intel/compiler/brw_compiler.c > b/src/intel/compiler/brw_compiler.c > index 8df0d2e..f02fceb 100644 > --- a/src/intel/compiler/brw_compiler.c > +++ b/src/intel/compiler/brw_compiler.c > @@ -57,7 +57,7 @@ static const struct nir_shader_compiler_options > scalar_nir_options = { > .lower_unpack_snorm_4x8 = true, > .lower_unpack_unorm_2x16 = true, > .lower_unpack_unorm_4x8 = true, > - .lower_subgroup_all_mask = true, > + .lower_subgroup_all_mask = false, > .lower_subgroup_masks = true, > .max_subgroup_size = 32, > .max_unroll_iterations = 32, > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 9202b0f..b73edc9 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -4185,7 +4185,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , > nir_intrinsic_instr *instr >break; > } > > - case nir_intrinsic_load_subgroup_all_mask: > + case nir_intrinsic_load_subgroup_all_mask: { > + uint32_t mask = ~UINT32_C(0) >> (32 - dispatch_width); > + bld.MOV(retype(dest, BRW_REGISTER_TYPE_Q), brw_imm_d(mask)); I think this instruction is unfortunately illegal on CHV and BXT due to a pretty aggravating restriction. See the comment after case nir_op_u2f64 in brw_fs_nir.cpp. If you set INTEL_DEVID_OVERRIDE=bxt INTEL_DEBUG=fs,cs it should trigger an assertion and print a corresponding annotation after the instruction in question telling you what went wrong. The solution should be the same as in nir_op_u2f64. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] i965/fs/nir: Don’t let nir lower nir_intrinsic_load_subgroup_all_mask
On Tue, Oct 31, 2017 at 11:29 AM, Jason Ekstrandwrote: > On Tue, Oct 31, 2017 at 10:55 AM, Neil Roberts > wrote: > >> Instead of letting nir lower nir_intrinsic_load_subgroup_all_mask this >> is now generated directly. This is more efficient because it can be >> calculated in the compiler based on the dispatch width. >> >> Sadly it’s still not totally ideal because the constant doesn’t seem >> to get propagated and there is still a redundant MOV. >> > One of the patches in my subgroups series switches us over to using a constant exec width of 32 that is provided to the NIR lowering pass so this will become a non-issue. --- >> src/intel/compiler/brw_compiler.c | 2 +- >> src/intel/compiler/brw_fs_nir.cpp | 7 ++- >> 2 files changed, 7 insertions(+), 2 deletions(-) >> >> diff --git a/src/intel/compiler/brw_compiler.c >> b/src/intel/compiler/brw_compiler.c >> index 8df0d2e..f02fceb 100644 >> --- a/src/intel/compiler/brw_compiler.c >> +++ b/src/intel/compiler/brw_compiler.c >> @@ -57,7 +57,7 @@ static const struct nir_shader_compiler_options >> scalar_nir_options = { >> .lower_unpack_snorm_4x8 = true, >> .lower_unpack_unorm_2x16 = true, >> .lower_unpack_unorm_4x8 = true, >> - .lower_subgroup_all_mask = true, >> + .lower_subgroup_all_mask = false, >> .lower_subgroup_masks = true, >> .max_subgroup_size = 32, >> .max_unroll_iterations = 32, >> diff --git a/src/intel/compiler/brw_fs_nir.cpp >> b/src/intel/compiler/brw_fs_nir.cpp >> index 9202b0f..b73edc9 100644 >> --- a/src/intel/compiler/brw_fs_nir.cpp >> +++ b/src/intel/compiler/brw_fs_nir.cpp >> @@ -4185,7 +4185,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder >> , nir_intrinsic_instr *instr >>break; >> } >> >> - case nir_intrinsic_load_subgroup_all_mask: >> + case nir_intrinsic_load_subgroup_all_mask: { >> + uint32_t mask = ~UINT32_C(0) >> (32 - dispatch_width); >> + bld.MOV(retype(dest, BRW_REGISTER_TYPE_Q), brw_imm_d(mask)); >> > > In SIMD32, you're going to get unintentional sign-extension here. I think > you want UQ and ud. > > --Jason > > >> + break; >> + } >> + >> case nir_intrinsic_load_subgroup_eq_mask: >> case nir_intrinsic_load_subgroup_ge_mask: >> case nir_intrinsic_load_subgroup_gt_mask: >> -- >> 2.9.5 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] i965/fs/nir: Don’t let nir lower nir_intrinsic_load_subgroup_all_mask
On Tue, Oct 31, 2017 at 10:55 AM, Neil Robertswrote: > Instead of letting nir lower nir_intrinsic_load_subgroup_all_mask this > is now generated directly. This is more efficient because it can be > calculated in the compiler based on the dispatch width. > > Sadly it’s still not totally ideal because the constant doesn’t seem > to get propagated and there is still a redundant MOV. > --- > src/intel/compiler/brw_compiler.c | 2 +- > src/intel/compiler/brw_fs_nir.cpp | 7 ++- > 2 files changed, 7 insertions(+), 2 deletions(-) > > diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_ > compiler.c > index 8df0d2e..f02fceb 100644 > --- a/src/intel/compiler/brw_compiler.c > +++ b/src/intel/compiler/brw_compiler.c > @@ -57,7 +57,7 @@ static const struct nir_shader_compiler_options > scalar_nir_options = { > .lower_unpack_snorm_4x8 = true, > .lower_unpack_unorm_2x16 = true, > .lower_unpack_unorm_4x8 = true, > - .lower_subgroup_all_mask = true, > + .lower_subgroup_all_mask = false, > .lower_subgroup_masks = true, > .max_subgroup_size = 32, > .max_unroll_iterations = 32, > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 9202b0f..b73edc9 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -4185,7 +4185,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder > , nir_intrinsic_instr *instr >break; > } > > - case nir_intrinsic_load_subgroup_all_mask: > + case nir_intrinsic_load_subgroup_all_mask: { > + uint32_t mask = ~UINT32_C(0) >> (32 - dispatch_width); > + bld.MOV(retype(dest, BRW_REGISTER_TYPE_Q), brw_imm_d(mask)); > In SIMD32, you're going to get unintentional sign-extension here. I think you want UQ and ud. --Jason > + break; > + } > + > case nir_intrinsic_load_subgroup_eq_mask: > case nir_intrinsic_load_subgroup_ge_mask: > case nir_intrinsic_load_subgroup_gt_mask: > -- > 2.9.5 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] i965/fs/nir: Don’t let nir lower nir_intrinsic_load_subgroup_all_mask
Instead of letting nir lower nir_intrinsic_load_subgroup_all_mask this is now generated directly. This is more efficient because it can be calculated in the compiler based on the dispatch width. Sadly it’s still not totally ideal because the constant doesn’t seem to get propagated and there is still a redundant MOV. --- src/intel/compiler/brw_compiler.c | 2 +- src/intel/compiler/brw_fs_nir.cpp | 7 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 8df0d2e..f02fceb 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -57,7 +57,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = { .lower_unpack_snorm_4x8 = true, .lower_unpack_unorm_2x16 = true, .lower_unpack_unorm_4x8 = true, - .lower_subgroup_all_mask = true, + .lower_subgroup_all_mask = false, .lower_subgroup_masks = true, .max_subgroup_size = 32, .max_unroll_iterations = 32, diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 9202b0f..b73edc9 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4185,7 +4185,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_subgroup_all_mask: + case nir_intrinsic_load_subgroup_all_mask: { + uint32_t mask = ~UINT32_C(0) >> (32 - dispatch_width); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_Q), brw_imm_d(mask)); + break; + } + case nir_intrinsic_load_subgroup_eq_mask: case nir_intrinsic_load_subgroup_ge_mask: case nir_intrinsic_load_subgroup_gt_mask: -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev