Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.

2016-07-19 Thread Jason Ekstrand
On Tue, Jul 19, 2016 at 5:02 PM, Kenneth Graunke 
wrote:

> On Tuesday, July 19, 2016 1:39:23 PM PDT Jason Ekstrand wrote:
> > On Mon, Jul 18, 2016 at 1:26 PM, Kenneth Graunke 
> > wrote:
> >
> > > This eliminates the need to walk the list of input variables, recurse
> > > into their types (via logic largely redundant with nir_lower_io), and
> > > interpolate all possible inputs up front.  The backend no longer has
> > > to care about variables at all, which eliminates complications from
> > > trying to pack multiple variables into the same location.  Instead,
> > > each intrinsic specifies exactly what's needed.
> > >
> > > This should unblock Timothy's work on GL_ARB_enhanced_layouts.
> > >
> > > Each load_interpolated_input intrinsic corresponds to PLN instructions,
> > > while load_barycentric_at_* intrinsics correspond to pixel interpolator
> > > messages.  The pixel/centroid/sample barycentric intrinsics simply
> refer
> > > to payload fields (delta_xy[]), and don't actually generate any code.
> > >
> > > Because we use a single intrinsic for both centroid-qualified variables
> > > and interpolateAtCentroid(), they become indistinguishable.  We stop
> > > sending pixel interpolator messages for those, and instead use the
> > > payload provided data, which should be considerably faster.
> > >
> > > On Broadwell:
> > >
> > > total instructions in shared programs: 9067751 -> 9067570 (-0.00%)
> > > instructions in affected programs: 145902 -> 145721 (-0.12%)
> > > helped: 422
> > > HURT: 209
> > >
> > > total spills in shared programs: 2849 -> 2899 (1.76%)
> > > spills in affected programs: 760 -> 810 (6.58%)
> > > helped: 0
> > > HURT: 10
> > >
> > > total fills in shared programs: 3910 -> 3950 (1.02%)
> > > fills in affected programs: 617 -> 657 (6.48%)
> > > helped: 0
> > > HURT: 10
> > >
> > > LOST:   3
> > > GAINED: 3
> > >
> > > The differences mostly appear to be slight changes in MOVs.
> > >
> > > Signed-off-by: Kenneth Graunke 
> > > ---
> > >  src/mesa/drivers/dri/i965/brw_fs.cpp | 175 -
> > >  src/mesa/drivers/dri/i965/brw_fs.h   |   9 +-
> > >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410
> > > ---
> > >  src/mesa/drivers/dri/i965/brw_nir.c  |  16 +-
> > >  4 files changed, 269 insertions(+), 341 deletions(-)
> > >
> > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > > b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > > index 94127bc..06007fe 100644
> > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > > @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg
> > > wpos)
> > > bld.MOV(wpos, this->wpos_w);
> > >  }
> > >
> > > -static enum brw_barycentric_mode
> > > -barycentric_mode(enum glsl_interp_mode mode,
> > > - bool is_centroid, bool is_sample)
> > > +enum brw_barycentric_mode
> > > +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op)
> > >  {
> > > -   unsigned bary;
> > > -
> > > /* Barycentric modes don't make sense for flat inputs. */
> > > assert(mode != INTERP_MODE_FLAT);
> > >
> > > -   if (is_sample) {
> > > -  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
> > > -   } else if (is_centroid) {
> > > -  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
> > > -   } else {
> > > +   unsigned bary;
> > > +   switch (op) {
> > > +   case nir_intrinsic_load_barycentric_pixel:
> > > +   case nir_intrinsic_load_barycentric_at_offset:
> > >bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;
> > > +  break;
> > > +   case nir_intrinsic_load_barycentric_centroid:
> > > +  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
> > > +  break;
> > > +   case nir_intrinsic_load_barycentric_sample:
> > > +   case nir_intrinsic_load_barycentric_at_sample:
> > > +  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
> > > +  break;
> > > +   default:
> > > +  assert(!"invalid intrinsic");
> > > }
> > >
> > > if (mode == INTERP_MODE_NOPERSPECTIVE)
> > > @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode
> bary)
> > > return (enum brw_barycentric_mode) ((unsigned) bary - 1);
> > >  }
> > >
> > > -void
> > > -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
> > > -   const glsl_type *type,
> > > -   glsl_interp_mode
> > > interpolation_mode,
> > > -   int *location, bool
> mod_centroid,
> > > -   bool mod_sample)
> > > -{
> > > -   assert(stage == MESA_SHADER_FRAGMENT);
> > > -   brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
> > > -
> > > -   if (type->is_array() || type->is_matrix()) {
> > > -  const glsl_type *elem_type = glsl_get_array_element(type);
> > > -  const unsigned length = glsl_get_length(type);
> > > -
> > > -  for (unsigned i = 

Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.

2016-07-19 Thread Kenneth Graunke
On Tuesday, July 19, 2016 1:39:23 PM PDT Jason Ekstrand wrote:
> On Mon, Jul 18, 2016 at 1:26 PM, Kenneth Graunke 
> wrote:
> 
> > This eliminates the need to walk the list of input variables, recurse
> > into their types (via logic largely redundant with nir_lower_io), and
> > interpolate all possible inputs up front.  The backend no longer has
> > to care about variables at all, which eliminates complications from
> > trying to pack multiple variables into the same location.  Instead,
> > each intrinsic specifies exactly what's needed.
> >
> > This should unblock Timothy's work on GL_ARB_enhanced_layouts.
> >
> > Each load_interpolated_input intrinsic corresponds to PLN instructions,
> > while load_barycentric_at_* intrinsics correspond to pixel interpolator
> > messages.  The pixel/centroid/sample barycentric intrinsics simply refer
> > to payload fields (delta_xy[]), and don't actually generate any code.
> >
> > Because we use a single intrinsic for both centroid-qualified variables
> > and interpolateAtCentroid(), they become indistinguishable.  We stop
> > sending pixel interpolator messages for those, and instead use the
> > payload provided data, which should be considerably faster.
> >
> > On Broadwell:
> >
> > total instructions in shared programs: 9067751 -> 9067570 (-0.00%)
> > instructions in affected programs: 145902 -> 145721 (-0.12%)
> > helped: 422
> > HURT: 209
> >
> > total spills in shared programs: 2849 -> 2899 (1.76%)
> > spills in affected programs: 760 -> 810 (6.58%)
> > helped: 0
> > HURT: 10
> >
> > total fills in shared programs: 3910 -> 3950 (1.02%)
> > fills in affected programs: 617 -> 657 (6.48%)
> > helped: 0
> > HURT: 10
> >
> > LOST:   3
> > GAINED: 3
> >
> > The differences mostly appear to be slight changes in MOVs.
> >
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp | 175 -
> >  src/mesa/drivers/dri/i965/brw_fs.h   |   9 +-
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410
> > ---
> >  src/mesa/drivers/dri/i965/brw_nir.c  |  16 +-
> >  4 files changed, 269 insertions(+), 341 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index 94127bc..06007fe 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg
> > wpos)
> > bld.MOV(wpos, this->wpos_w);
> >  }
> >
> > -static enum brw_barycentric_mode
> > -barycentric_mode(enum glsl_interp_mode mode,
> > - bool is_centroid, bool is_sample)
> > +enum brw_barycentric_mode
> > +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op)
> >  {
> > -   unsigned bary;
> > -
> > /* Barycentric modes don't make sense for flat inputs. */
> > assert(mode != INTERP_MODE_FLAT);
> >
> > -   if (is_sample) {
> > -  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
> > -   } else if (is_centroid) {
> > -  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
> > -   } else {
> > +   unsigned bary;
> > +   switch (op) {
> > +   case nir_intrinsic_load_barycentric_pixel:
> > +   case nir_intrinsic_load_barycentric_at_offset:
> >bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;
> > +  break;
> > +   case nir_intrinsic_load_barycentric_centroid:
> > +  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
> > +  break;
> > +   case nir_intrinsic_load_barycentric_sample:
> > +   case nir_intrinsic_load_barycentric_at_sample:
> > +  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
> > +  break;
> > +   default:
> > +  assert(!"invalid intrinsic");
> > }
> >
> > if (mode == INTERP_MODE_NOPERSPECTIVE)
> > @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary)
> > return (enum brw_barycentric_mode) ((unsigned) bary - 1);
> >  }
> >
> > -void
> > -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
> > -   const glsl_type *type,
> > -   glsl_interp_mode
> > interpolation_mode,
> > -   int *location, bool mod_centroid,
> > -   bool mod_sample)
> > -{
> > -   assert(stage == MESA_SHADER_FRAGMENT);
> > -   brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
> > -
> > -   if (type->is_array() || type->is_matrix()) {
> > -  const glsl_type *elem_type = glsl_get_array_element(type);
> > -  const unsigned length = glsl_get_length(type);
> > -
> > -  for (unsigned i = 0; i < length; i++) {
> > - emit_general_interpolation(attr, name, elem_type,
> > interpolation_mode,
> > -location, mod_centroid, mod_sample);
> > -  }
> > -   } else if (type->is_record()) {
> > -  for (unsigned i = 0; i < type->length; i++) {
> > - const 

Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.

2016-07-19 Thread Jason Ekstrand
On Mon, Jul 18, 2016 at 1:26 PM, Kenneth Graunke 
wrote:

> This eliminates the need to walk the list of input variables, recurse
> into their types (via logic largely redundant with nir_lower_io), and
> interpolate all possible inputs up front.  The backend no longer has
> to care about variables at all, which eliminates complications from
> trying to pack multiple variables into the same location.  Instead,
> each intrinsic specifies exactly what's needed.
>
> This should unblock Timothy's work on GL_ARB_enhanced_layouts.
>
> Each load_interpolated_input intrinsic corresponds to PLN instructions,
> while load_barycentric_at_* intrinsics correspond to pixel interpolator
> messages.  The pixel/centroid/sample barycentric intrinsics simply refer
> to payload fields (delta_xy[]), and don't actually generate any code.
>
> Because we use a single intrinsic for both centroid-qualified variables
> and interpolateAtCentroid(), they become indistinguishable.  We stop
> sending pixel interpolator messages for those, and instead use the
> payload provided data, which should be considerably faster.
>
> On Broadwell:
>
> total instructions in shared programs: 9067751 -> 9067570 (-0.00%)
> instructions in affected programs: 145902 -> 145721 (-0.12%)
> helped: 422
> HURT: 209
>
> total spills in shared programs: 2849 -> 2899 (1.76%)
> spills in affected programs: 760 -> 810 (6.58%)
> helped: 0
> HURT: 10
>
> total fills in shared programs: 3910 -> 3950 (1.02%)
> fills in affected programs: 617 -> 657 (6.48%)
> helped: 0
> HURT: 10
>
> LOST:   3
> GAINED: 3
>
> The differences mostly appear to be slight changes in MOVs.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 175 -
>  src/mesa/drivers/dri/i965/brw_fs.h   |   9 +-
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410
> ---
>  src/mesa/drivers/dri/i965/brw_nir.c  |  16 +-
>  4 files changed, 269 insertions(+), 341 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 94127bc..06007fe 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg
> wpos)
> bld.MOV(wpos, this->wpos_w);
>  }
>
> -static enum brw_barycentric_mode
> -barycentric_mode(enum glsl_interp_mode mode,
> - bool is_centroid, bool is_sample)
> +enum brw_barycentric_mode
> +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op)
>  {
> -   unsigned bary;
> -
> /* Barycentric modes don't make sense for flat inputs. */
> assert(mode != INTERP_MODE_FLAT);
>
> -   if (is_sample) {
> -  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
> -   } else if (is_centroid) {
> -  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
> -   } else {
> +   unsigned bary;
> +   switch (op) {
> +   case nir_intrinsic_load_barycentric_pixel:
> +   case nir_intrinsic_load_barycentric_at_offset:
>bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;
> +  break;
> +   case nir_intrinsic_load_barycentric_centroid:
> +  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
> +  break;
> +   case nir_intrinsic_load_barycentric_sample:
> +   case nir_intrinsic_load_barycentric_at_sample:
> +  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
> +  break;
> +   default:
> +  assert(!"invalid intrinsic");
> }
>
> if (mode == INTERP_MODE_NOPERSPECTIVE)
> @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary)
> return (enum brw_barycentric_mode) ((unsigned) bary - 1);
>  }
>
> -void
> -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
> -   const glsl_type *type,
> -   glsl_interp_mode
> interpolation_mode,
> -   int *location, bool mod_centroid,
> -   bool mod_sample)
> -{
> -   assert(stage == MESA_SHADER_FRAGMENT);
> -   brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
> -
> -   if (type->is_array() || type->is_matrix()) {
> -  const glsl_type *elem_type = glsl_get_array_element(type);
> -  const unsigned length = glsl_get_length(type);
> -
> -  for (unsigned i = 0; i < length; i++) {
> - emit_general_interpolation(attr, name, elem_type,
> interpolation_mode,
> -location, mod_centroid, mod_sample);
> -  }
> -   } else if (type->is_record()) {
> -  for (unsigned i = 0; i < type->length; i++) {
> - const glsl_type *field_type = type->fields.structure[i].type;
> - emit_general_interpolation(attr, name, field_type,
> interpolation_mode,
> -location, mod_centroid, mod_sample);
> -  }
> -   } else {
> -  assert(type->is_scalar() || type->is_vector());
> -
> -  if 

Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.

2016-07-18 Thread Chris Forbes
On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke 
wrote:

> +   default:
> +  assert(!"invalid intrinsic");
>

unreachable() ?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.

2016-07-18 Thread Kenneth Graunke
This eliminates the need to walk the list of input variables, recurse
into their types (via logic largely redundant with nir_lower_io), and
interpolate all possible inputs up front.  The backend no longer has
to care about variables at all, which eliminates complications from
trying to pack multiple variables into the same location.  Instead,
each intrinsic specifies exactly what's needed.

This should unblock Timothy's work on GL_ARB_enhanced_layouts.

Each load_interpolated_input intrinsic corresponds to PLN instructions,
while load_barycentric_at_* intrinsics correspond to pixel interpolator
messages.  The pixel/centroid/sample barycentric intrinsics simply refer
to payload fields (delta_xy[]), and don't actually generate any code.

Because we use a single intrinsic for both centroid-qualified variables
and interpolateAtCentroid(), they become indistinguishable.  We stop
sending pixel interpolator messages for those, and instead use the
payload provided data, which should be considerably faster.

On Broadwell:

total instructions in shared programs: 9067751 -> 9067570 (-0.00%)
instructions in affected programs: 145902 -> 145721 (-0.12%)
helped: 422
HURT: 209

total spills in shared programs: 2849 -> 2899 (1.76%)
spills in affected programs: 760 -> 810 (6.58%)
helped: 0
HURT: 10

total fills in shared programs: 3910 -> 3950 (1.02%)
fills in affected programs: 617 -> 657 (6.48%)
helped: 0
HURT: 10

LOST:   3
GAINED: 3

The differences mostly appear to be slight changes in MOVs.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 175 -
 src/mesa/drivers/dri/i965/brw_fs.h   |   9 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 ---
 src/mesa/drivers/dri/i965/brw_nir.c  |  16 +-
 4 files changed, 269 insertions(+), 341 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 94127bc..06007fe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg wpos)
bld.MOV(wpos, this->wpos_w);
 }
 
-static enum brw_barycentric_mode
-barycentric_mode(enum glsl_interp_mode mode,
- bool is_centroid, bool is_sample)
+enum brw_barycentric_mode
+brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op)
 {
-   unsigned bary;
-
/* Barycentric modes don't make sense for flat inputs. */
assert(mode != INTERP_MODE_FLAT);
 
-   if (is_sample) {
-  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
-   } else if (is_centroid) {
-  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
-   } else {
+   unsigned bary;
+   switch (op) {
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_at_offset:
   bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;
+  break;
+   case nir_intrinsic_load_barycentric_centroid:
+  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
+  break;
+   case nir_intrinsic_load_barycentric_sample:
+   case nir_intrinsic_load_barycentric_at_sample:
+  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
+  break;
+   default:
+  assert(!"invalid intrinsic");
}
 
if (mode == INTERP_MODE_NOPERSPECTIVE)
@@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary)
return (enum brw_barycentric_mode) ((unsigned) bary - 1);
 }
 
-void
-fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
-   const glsl_type *type,
-   glsl_interp_mode interpolation_mode,
-   int *location, bool mod_centroid,
-   bool mod_sample)
-{
-   assert(stage == MESA_SHADER_FRAGMENT);
-   brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
-
-   if (type->is_array() || type->is_matrix()) {
-  const glsl_type *elem_type = glsl_get_array_element(type);
-  const unsigned length = glsl_get_length(type);
-
-  for (unsigned i = 0; i < length; i++) {
- emit_general_interpolation(attr, name, elem_type, interpolation_mode,
-location, mod_centroid, mod_sample);
-  }
-   } else if (type->is_record()) {
-  for (unsigned i = 0; i < type->length; i++) {
- const glsl_type *field_type = type->fields.structure[i].type;
- emit_general_interpolation(attr, name, field_type, interpolation_mode,
-location, mod_centroid, mod_sample);
-  }
-   } else {
-  assert(type->is_scalar() || type->is_vector());
-
-  if (prog_data->urb_setup[*location] == -1) {
- /* If there's no incoming setup data for this slot, don't
-  * emit interpolation for it.
-  */
- *attr = offset(*attr, bld, type->vector_elements);
- (*location)++;
- return;
-  }
-
-  attr->type =