Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.
On Tue, Jul 19, 2016 at 5:02 PM, Kenneth Graunkewrote: > On Tuesday, July 19, 2016 1:39:23 PM PDT Jason Ekstrand wrote: > > On Mon, Jul 18, 2016 at 1:26 PM, Kenneth Graunke > > wrote: > > > > > This eliminates the need to walk the list of input variables, recurse > > > into their types (via logic largely redundant with nir_lower_io), and > > > interpolate all possible inputs up front. The backend no longer has > > > to care about variables at all, which eliminates complications from > > > trying to pack multiple variables into the same location. Instead, > > > each intrinsic specifies exactly what's needed. > > > > > > This should unblock Timothy's work on GL_ARB_enhanced_layouts. > > > > > > Each load_interpolated_input intrinsic corresponds to PLN instructions, > > > while load_barycentric_at_* intrinsics correspond to pixel interpolator > > > messages. The pixel/centroid/sample barycentric intrinsics simply > refer > > > to payload fields (delta_xy[]), and don't actually generate any code. > > > > > > Because we use a single intrinsic for both centroid-qualified variables > > > and interpolateAtCentroid(), they become indistinguishable. We stop > > > sending pixel interpolator messages for those, and instead use the > > > payload provided data, which should be considerably faster. > > > > > > On Broadwell: > > > > > > total instructions in shared programs: 9067751 -> 9067570 (-0.00%) > > > instructions in affected programs: 145902 -> 145721 (-0.12%) > > > helped: 422 > > > HURT: 209 > > > > > > total spills in shared programs: 2849 -> 2899 (1.76%) > > > spills in affected programs: 760 -> 810 (6.58%) > > > helped: 0 > > > HURT: 10 > > > > > > total fills in shared programs: 3910 -> 3950 (1.02%) > > > fills in affected programs: 617 -> 657 (6.48%) > > > helped: 0 > > > HURT: 10 > > > > > > LOST: 3 > > > GAINED: 3 > > > > > > The differences mostly appear to be slight changes in MOVs. > > > > > > Signed-off-by: Kenneth Graunke > > > --- > > > src/mesa/drivers/dri/i965/brw_fs.cpp | 175 - > > > src/mesa/drivers/dri/i965/brw_fs.h | 9 +- > > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 > > > --- > > > src/mesa/drivers/dri/i965/brw_nir.c | 16 +- > > > 4 files changed, 269 insertions(+), 341 deletions(-) > > > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > > > b/src/mesa/drivers/dri/i965/brw_fs.cpp > > > index 94127bc..06007fe 100644 > > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > > > @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg > > > wpos) > > > bld.MOV(wpos, this->wpos_w); > > > } > > > > > > -static enum brw_barycentric_mode > > > -barycentric_mode(enum glsl_interp_mode mode, > > > - bool is_centroid, bool is_sample) > > > +enum brw_barycentric_mode > > > +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op) > > > { > > > - unsigned bary; > > > - > > > /* Barycentric modes don't make sense for flat inputs. */ > > > assert(mode != INTERP_MODE_FLAT); > > > > > > - if (is_sample) { > > > - bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; > > > - } else if (is_centroid) { > > > - bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; > > > - } else { > > > + unsigned bary; > > > + switch (op) { > > > + case nir_intrinsic_load_barycentric_pixel: > > > + case nir_intrinsic_load_barycentric_at_offset: > > >bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL; > > > + break; > > > + case nir_intrinsic_load_barycentric_centroid: > > > + bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; > > > + break; > > > + case nir_intrinsic_load_barycentric_sample: > > > + case nir_intrinsic_load_barycentric_at_sample: > > > + bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; > > > + break; > > > + default: > > > + assert(!"invalid intrinsic"); > > > } > > > > > > if (mode == INTERP_MODE_NOPERSPECTIVE) > > > @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode > bary) > > > return (enum brw_barycentric_mode) ((unsigned) bary - 1); > > > } > > > > > > -void > > > -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, > > > - const glsl_type *type, > > > - glsl_interp_mode > > > interpolation_mode, > > > - int *location, bool > mod_centroid, > > > - bool mod_sample) > > > -{ > > > - assert(stage == MESA_SHADER_FRAGMENT); > > > - brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; > > > - > > > - if (type->is_array() || type->is_matrix()) { > > > - const glsl_type *elem_type = glsl_get_array_element(type); > > > - const unsigned length = glsl_get_length(type); > > > - > > > - for (unsigned i =
Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.
On Tuesday, July 19, 2016 1:39:23 PM PDT Jason Ekstrand wrote: > On Mon, Jul 18, 2016 at 1:26 PM, Kenneth Graunke> wrote: > > > This eliminates the need to walk the list of input variables, recurse > > into their types (via logic largely redundant with nir_lower_io), and > > interpolate all possible inputs up front. The backend no longer has > > to care about variables at all, which eliminates complications from > > trying to pack multiple variables into the same location. Instead, > > each intrinsic specifies exactly what's needed. > > > > This should unblock Timothy's work on GL_ARB_enhanced_layouts. > > > > Each load_interpolated_input intrinsic corresponds to PLN instructions, > > while load_barycentric_at_* intrinsics correspond to pixel interpolator > > messages. The pixel/centroid/sample barycentric intrinsics simply refer > > to payload fields (delta_xy[]), and don't actually generate any code. > > > > Because we use a single intrinsic for both centroid-qualified variables > > and interpolateAtCentroid(), they become indistinguishable. We stop > > sending pixel interpolator messages for those, and instead use the > > payload provided data, which should be considerably faster. > > > > On Broadwell: > > > > total instructions in shared programs: 9067751 -> 9067570 (-0.00%) > > instructions in affected programs: 145902 -> 145721 (-0.12%) > > helped: 422 > > HURT: 209 > > > > total spills in shared programs: 2849 -> 2899 (1.76%) > > spills in affected programs: 760 -> 810 (6.58%) > > helped: 0 > > HURT: 10 > > > > total fills in shared programs: 3910 -> 3950 (1.02%) > > fills in affected programs: 617 -> 657 (6.48%) > > helped: 0 > > HURT: 10 > > > > LOST: 3 > > GAINED: 3 > > > > The differences mostly appear to be slight changes in MOVs. > > > > Signed-off-by: Kenneth Graunke > > --- > > src/mesa/drivers/dri/i965/brw_fs.cpp | 175 - > > src/mesa/drivers/dri/i965/brw_fs.h | 9 +- > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 > > --- > > src/mesa/drivers/dri/i965/brw_nir.c | 16 +- > > 4 files changed, 269 insertions(+), 341 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > > b/src/mesa/drivers/dri/i965/brw_fs.cpp > > index 94127bc..06007fe 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > > @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg > > wpos) > > bld.MOV(wpos, this->wpos_w); > > } > > > > -static enum brw_barycentric_mode > > -barycentric_mode(enum glsl_interp_mode mode, > > - bool is_centroid, bool is_sample) > > +enum brw_barycentric_mode > > +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op) > > { > > - unsigned bary; > > - > > /* Barycentric modes don't make sense for flat inputs. */ > > assert(mode != INTERP_MODE_FLAT); > > > > - if (is_sample) { > > - bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; > > - } else if (is_centroid) { > > - bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; > > - } else { > > + unsigned bary; > > + switch (op) { > > + case nir_intrinsic_load_barycentric_pixel: > > + case nir_intrinsic_load_barycentric_at_offset: > >bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL; > > + break; > > + case nir_intrinsic_load_barycentric_centroid: > > + bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; > > + break; > > + case nir_intrinsic_load_barycentric_sample: > > + case nir_intrinsic_load_barycentric_at_sample: > > + bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; > > + break; > > + default: > > + assert(!"invalid intrinsic"); > > } > > > > if (mode == INTERP_MODE_NOPERSPECTIVE) > > @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary) > > return (enum brw_barycentric_mode) ((unsigned) bary - 1); > > } > > > > -void > > -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, > > - const glsl_type *type, > > - glsl_interp_mode > > interpolation_mode, > > - int *location, bool mod_centroid, > > - bool mod_sample) > > -{ > > - assert(stage == MESA_SHADER_FRAGMENT); > > - brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; > > - > > - if (type->is_array() || type->is_matrix()) { > > - const glsl_type *elem_type = glsl_get_array_element(type); > > - const unsigned length = glsl_get_length(type); > > - > > - for (unsigned i = 0; i < length; i++) { > > - emit_general_interpolation(attr, name, elem_type, > > interpolation_mode, > > -location, mod_centroid, mod_sample); > > - } > > - } else if (type->is_record()) { > > - for (unsigned i = 0; i < type->length; i++) { > > - const
Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.
On Mon, Jul 18, 2016 at 1:26 PM, Kenneth Graunkewrote: > This eliminates the need to walk the list of input variables, recurse > into their types (via logic largely redundant with nir_lower_io), and > interpolate all possible inputs up front. The backend no longer has > to care about variables at all, which eliminates complications from > trying to pack multiple variables into the same location. Instead, > each intrinsic specifies exactly what's needed. > > This should unblock Timothy's work on GL_ARB_enhanced_layouts. > > Each load_interpolated_input intrinsic corresponds to PLN instructions, > while load_barycentric_at_* intrinsics correspond to pixel interpolator > messages. The pixel/centroid/sample barycentric intrinsics simply refer > to payload fields (delta_xy[]), and don't actually generate any code. > > Because we use a single intrinsic for both centroid-qualified variables > and interpolateAtCentroid(), they become indistinguishable. We stop > sending pixel interpolator messages for those, and instead use the > payload provided data, which should be considerably faster. > > On Broadwell: > > total instructions in shared programs: 9067751 -> 9067570 (-0.00%) > instructions in affected programs: 145902 -> 145721 (-0.12%) > helped: 422 > HURT: 209 > > total spills in shared programs: 2849 -> 2899 (1.76%) > spills in affected programs: 760 -> 810 (6.58%) > helped: 0 > HURT: 10 > > total fills in shared programs: 3910 -> 3950 (1.02%) > fills in affected programs: 617 -> 657 (6.48%) > helped: 0 > HURT: 10 > > LOST: 3 > GAINED: 3 > > The differences mostly appear to be slight changes in MOVs. > > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 175 - > src/mesa/drivers/dri/i965/brw_fs.h | 9 +- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 > --- > src/mesa/drivers/dri/i965/brw_nir.c | 16 +- > 4 files changed, 269 insertions(+), 341 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 94127bc..06007fe 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg > wpos) > bld.MOV(wpos, this->wpos_w); > } > > -static enum brw_barycentric_mode > -barycentric_mode(enum glsl_interp_mode mode, > - bool is_centroid, bool is_sample) > +enum brw_barycentric_mode > +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op) > { > - unsigned bary; > - > /* Barycentric modes don't make sense for flat inputs. */ > assert(mode != INTERP_MODE_FLAT); > > - if (is_sample) { > - bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; > - } else if (is_centroid) { > - bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; > - } else { > + unsigned bary; > + switch (op) { > + case nir_intrinsic_load_barycentric_pixel: > + case nir_intrinsic_load_barycentric_at_offset: >bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL; > + break; > + case nir_intrinsic_load_barycentric_centroid: > + bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; > + break; > + case nir_intrinsic_load_barycentric_sample: > + case nir_intrinsic_load_barycentric_at_sample: > + bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; > + break; > + default: > + assert(!"invalid intrinsic"); > } > > if (mode == INTERP_MODE_NOPERSPECTIVE) > @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary) > return (enum brw_barycentric_mode) ((unsigned) bary - 1); > } > > -void > -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, > - const glsl_type *type, > - glsl_interp_mode > interpolation_mode, > - int *location, bool mod_centroid, > - bool mod_sample) > -{ > - assert(stage == MESA_SHADER_FRAGMENT); > - brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; > - > - if (type->is_array() || type->is_matrix()) { > - const glsl_type *elem_type = glsl_get_array_element(type); > - const unsigned length = glsl_get_length(type); > - > - for (unsigned i = 0; i < length; i++) { > - emit_general_interpolation(attr, name, elem_type, > interpolation_mode, > -location, mod_centroid, mod_sample); > - } > - } else if (type->is_record()) { > - for (unsigned i = 0; i < type->length; i++) { > - const glsl_type *field_type = type->fields.structure[i].type; > - emit_general_interpolation(attr, name, field_type, > interpolation_mode, > -location, mod_centroid, mod_sample); > - } > - } else { > - assert(type->is_scalar() || type->is_vector()); > - > - if
Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.
On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunkewrote: > + default: > + assert(!"invalid intrinsic"); > unreachable() ? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.
This eliminates the need to walk the list of input variables, recurse into their types (via logic largely redundant with nir_lower_io), and interpolate all possible inputs up front. The backend no longer has to care about variables at all, which eliminates complications from trying to pack multiple variables into the same location. Instead, each intrinsic specifies exactly what's needed. This should unblock Timothy's work on GL_ARB_enhanced_layouts. Each load_interpolated_input intrinsic corresponds to PLN instructions, while load_barycentric_at_* intrinsics correspond to pixel interpolator messages. The pixel/centroid/sample barycentric intrinsics simply refer to payload fields (delta_xy[]), and don't actually generate any code. Because we use a single intrinsic for both centroid-qualified variables and interpolateAtCentroid(), they become indistinguishable. We stop sending pixel interpolator messages for those, and instead use the payload provided data, which should be considerably faster. On Broadwell: total instructions in shared programs: 9067751 -> 9067570 (-0.00%) instructions in affected programs: 145902 -> 145721 (-0.12%) helped: 422 HURT: 209 total spills in shared programs: 2849 -> 2899 (1.76%) spills in affected programs: 760 -> 810 (6.58%) helped: 0 HURT: 10 total fills in shared programs: 3910 -> 3950 (1.02%) fills in affected programs: 617 -> 657 (6.48%) helped: 0 HURT: 10 LOST: 3 GAINED: 3 The differences mostly appear to be slight changes in MOVs. Signed-off-by: Kenneth Graunke--- src/mesa/drivers/dri/i965/brw_fs.cpp | 175 - src/mesa/drivers/dri/i965/brw_fs.h | 9 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 --- src/mesa/drivers/dri/i965/brw_nir.c | 16 +- 4 files changed, 269 insertions(+), 341 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 94127bc..06007fe 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg wpos) bld.MOV(wpos, this->wpos_w); } -static enum brw_barycentric_mode -barycentric_mode(enum glsl_interp_mode mode, - bool is_centroid, bool is_sample) +enum brw_barycentric_mode +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op) { - unsigned bary; - /* Barycentric modes don't make sense for flat inputs. */ assert(mode != INTERP_MODE_FLAT); - if (is_sample) { - bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; - } else if (is_centroid) { - bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; - } else { + unsigned bary; + switch (op) { + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_at_offset: bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL; + break; + case nir_intrinsic_load_barycentric_centroid: + bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; + break; + case nir_intrinsic_load_barycentric_sample: + case nir_intrinsic_load_barycentric_at_sample: + bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; + break; + default: + assert(!"invalid intrinsic"); } if (mode == INTERP_MODE_NOPERSPECTIVE) @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary) return (enum brw_barycentric_mode) ((unsigned) bary - 1); } -void -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, - const glsl_type *type, - glsl_interp_mode interpolation_mode, - int *location, bool mod_centroid, - bool mod_sample) -{ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; - - if (type->is_array() || type->is_matrix()) { - const glsl_type *elem_type = glsl_get_array_element(type); - const unsigned length = glsl_get_length(type); - - for (unsigned i = 0; i < length; i++) { - emit_general_interpolation(attr, name, elem_type, interpolation_mode, -location, mod_centroid, mod_sample); - } - } else if (type->is_record()) { - for (unsigned i = 0; i < type->length; i++) { - const glsl_type *field_type = type->fields.structure[i].type; - emit_general_interpolation(attr, name, field_type, interpolation_mode, -location, mod_centroid, mod_sample); - } - } else { - assert(type->is_scalar() || type->is_vector()); - - if (prog_data->urb_setup[*location] == -1) { - /* If there's no incoming setup data for this slot, don't - * emit interpolation for it. - */ - *attr = offset(*attr, bld, type->vector_elements); - (*location)++; - return; - } - - attr->type =