subject:"\[Mesa\-dev\] \[PATCH v4 22\/44\] i965\/fs\: Helpers for un\/shuffle 16\-bit pairs in 32\-bit components"

Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

2017-12-01 Thread Jason Ekstrand

On Thu, Nov 30, 2017 at 3:41 PM, Chema Casanova 
wrote:

>
>
> On 30/11/17 23:21, Jason Ekstrand wrote:
> > On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo
> > > wrote:
> >
> > This helpers are used to load/store 16-bit types from/to 32-bit
> > components.
> >
> > The functions shuffle_32bit_load_result_to_16bit_data and
> > shuffle_16bit_data_for_32bit_write are implemented in a similar
> > way than the analogous functions for handling 64-bit types.
> > ---
> >  src/intel/compiler/brw_fs.h   | 11 +
> >  src/intel/compiler/brw_fs_nir.cpp | 51
> > +++
> >  2 files changed, 62 insertions(+)
> >
> > diff --git a/src/intel/compiler/brw_fs.h
> b/src/intel/compiler/brw_fs.h
> > index 19b897e7a9..30557324d5 100644
> > --- a/src/intel/compiler/brw_fs.h
> > +++ b/src/intel/compiler/brw_fs.h
> > @@ -497,6 +497,17 @@ void
> > shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder ,
> >  fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder
> ,
> >const fs_reg ,
> >uint32_t components);
> > +
> > +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder
> > ,
> > + const fs_reg ,
> > + const fs_reg ,
> > + uint32_t components);
> > +
> > +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder ,
> > +const fs_reg ,
> > +const fs_reg ,
> > +uint32_t components);
> > +
> >  fs_reg setup_imm_df(const brw::fs_builder ,
> >  double v);
> >
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> > b/src/intel/compiler/brw_fs_nir.cpp
> > index 726b2fcee7..c091241132 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const
> > fs_builder ,
> > }
> >  }
> >
> > +void
> > +shuffle_32bit_load_result_to_16bit_data(const fs_builder ,
> > +const fs_reg ,
> > +const fs_reg ,
> > +uint32_t components)
> > +{
> > +   assert(type_sz(src.type) == 4);
> > +   assert(type_sz(dst.type) == 2);
> > +
> > +   fs_reg tmp = retype(bld.vgrf(src.type), dst.type);
> > +
> > +   for (unsigned i = 0; i < components; i++) {
> > +  const fs_reg component_i = subscript(offset(src, bld, i / 2),
> > dst.type, i % 2);
> > +
> > +  bld.MOV(offset(tmp, bld, i % 2), component_i);
> > +
> > +  if (i % 2) {
> > + bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0));
> > + bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1));
> > +  }
> >
> >
> > I'm very confused by this extra moving.  Why can't we just do
> >
> > bld.MOV(offset(dst, bld, i), component_i);
> >
> > above?  Maybe I'm missing something but I don't see why the extra moves
> > are needed.
>
>
> There is a comment in the previous function
> shuffle_32bit_load_result_to_64bit_data that explains the similar
> situation that still applies in shuffle_32bit_load_result_to_16bit_data,
> what about including the following comment.
>
> /* A temporary that we will use to shuffle the 16-bit data of each
>  * component in the vector into valid 32-bit data. We can't write
>  * directly to dst because dst can be the same as src and in that
>  * case the first MOV in the loop below would overwrite the data
>  * read in the second MOV.
>  */
>
> But in any case I've just checked, and at first sight at the 6 final
> uses of this function this situation never happens.
>
> >
> >
> > +   }
> > +   if (components % 2) {
> > +  bld.MOV(offset(dst, bld, components - 1), tmp);
> > +   }
> > +}
> > +
> > +
> >  /**
> >   * This helper does the inverse operation of
> >   * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA.
> > @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const
> > fs_builder ,
> > return dst;
> >  }
> >
> > +void
> > +shuffle_16bit_data_for_32bit_write(const fs_builder ,
> > +   const fs_reg ,
> > +   const fs_reg ,
> > +   uint32_t components)
> > +{
> > +   assert(type_sz(src.type) == 2);
> > +   assert(type_sz(dst.type) == 4);
> > +
> > +   fs_reg tmp = bld.vgrf(dst.type);
> > +

Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

2017-11-30 Thread Chema Casanova



On 30/11/17 23:21, Jason Ekstrand wrote:
> On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo
> > wrote:
> 
> This helpers are used to load/store 16-bit types from/to 32-bit
> components.
> 
> The functions shuffle_32bit_load_result_to_16bit_data and
> shuffle_16bit_data_for_32bit_write are implemented in a similar
> way than the analogous functions for handling 64-bit types.
> ---
>  src/intel/compiler/brw_fs.h       | 11 +
>  src/intel/compiler/brw_fs_nir.cpp | 51
> +++
>  2 files changed, 62 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
> index 19b897e7a9..30557324d5 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -497,6 +497,17 @@ void
> shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder ,
>  fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder ,
>                                            const fs_reg ,
>                                            uint32_t components);
> +
> +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder
> ,
> +                                             const fs_reg ,
> +                                             const fs_reg ,
> +                                             uint32_t components);
> +
> +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder ,
> +                                        const fs_reg ,
> +                                        const fs_reg ,
> +                                        uint32_t components);
> +
>  fs_reg setup_imm_df(const brw::fs_builder ,
>                      double v);
> 
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 726b2fcee7..c091241132 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const
> fs_builder ,
>     }
>  }
> 
> +void
> +shuffle_32bit_load_result_to_16bit_data(const fs_builder ,
> +                                        const fs_reg ,
> +                                        const fs_reg ,
> +                                        uint32_t components)
> +{
> +   assert(type_sz(src.type) == 4);
> +   assert(type_sz(dst.type) == 2);
> +
> +   fs_reg tmp = retype(bld.vgrf(src.type), dst.type);
> +
> +   for (unsigned i = 0; i < components; i++) {
> +      const fs_reg component_i = subscript(offset(src, bld, i / 2),
> dst.type, i % 2);
> +
> +      bld.MOV(offset(tmp, bld, i % 2), component_i);
> +
> +      if (i % 2) {
> +         bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0));
> +         bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1));
> +      }
> 
> 
> I'm very confused by this extra moving.  Why can't we just do
> 
> bld.MOV(offset(dst, bld, i), component_i);
> 
> above?  Maybe I'm missing something but I don't see why the extra moves
> are needed.


There is a comment in the previous function
shuffle_32bit_load_result_to_64bit_data that explains the similar
situation that still applies in shuffle_32bit_load_result_to_16bit_data,
what about including the following comment.

/* A temporary that we will use to shuffle the 16-bit data of each
 * component in the vector into valid 32-bit data. We can't write
 * directly to dst because dst can be the same as src and in that
 * case the first MOV in the loop below would overwrite the data
 * read in the second MOV.
 */

But in any case I've just checked, and at first sight at the 6 final
uses of this function this situation never happens.

>  
> 
> +   }
> +   if (components % 2) {
> +      bld.MOV(offset(dst, bld, components - 1), tmp);
> +   }
> +}
> +
> +
>  /**
>   * This helper does the inverse operation of
>   * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA.
> @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const
> fs_builder ,
>     return dst;
>  }
> 
> +void
> +shuffle_16bit_data_for_32bit_write(const fs_builder ,
> +                                   const fs_reg ,
> +                                   const fs_reg ,
> +                                   uint32_t components)
> +{
> +   assert(type_sz(src.type) == 2);
> +   assert(type_sz(dst.type) == 4);
> +
> +   fs_reg tmp = bld.vgrf(dst.type);
> +
> +   for (unsigned i = 0; i < components; i++) {
> +      const fs_reg component_i = offset(src, bld, i);
> +      bld.MOV(subscript(tmp, src.type, i % 2), component_i);
> +      if (i % 2) {
> +         bld.MOV(offset(dst, bld, i / 2), tmp);
> +      }
> 
> 
> Again, why the extra MOVs?

Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

2017-11-30 Thread Jason Ekstrand

On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo <
jmcasan...@igalia.com> wrote:

> This helpers are used to load/store 16-bit types from/to 32-bit
> components.
>
> The functions shuffle_32bit_load_result_to_16bit_data and
> shuffle_16bit_data_for_32bit_write are implemented in a similar
> way than the analogous functions for handling 64-bit types.
> ---
>  src/intel/compiler/brw_fs.h   | 11 +
>  src/intel/compiler/brw_fs_nir.cpp | 51 ++
> +
>  2 files changed, 62 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
> index 19b897e7a9..30557324d5 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -497,6 +497,17 @@ void shuffle_32bit_load_result_to_64bit_data(const
> brw::fs_builder ,
>  fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder ,
>const fs_reg ,
>uint32_t components);
> +
> +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder ,
> + const fs_reg ,
> + const fs_reg ,
> + uint32_t components);
> +
> +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder ,
> +const fs_reg ,
> +const fs_reg ,
> +uint32_t components);
> +
>  fs_reg setup_imm_df(const brw::fs_builder ,
>  double v);
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 726b2fcee7..c091241132 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const
> fs_builder ,
> }
>  }
>
> +void
> +shuffle_32bit_load_result_to_16bit_data(const fs_builder ,
> +const fs_reg ,
> +const fs_reg ,
> +uint32_t components)
> +{
> +   assert(type_sz(src.type) == 4);
> +   assert(type_sz(dst.type) == 2);
> +
> +   fs_reg tmp = retype(bld.vgrf(src.type), dst.type);
> +
> +   for (unsigned i = 0; i < components; i++) {
> +  const fs_reg component_i = subscript(offset(src, bld, i / 2),
> dst.type, i % 2);
> +
> +  bld.MOV(offset(tmp, bld, i % 2), component_i);
> +
> +  if (i % 2) {
> + bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0));
> + bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1));
> +  }
>

I'm very confused by this extra moving.  Why can't we just do

bld.MOV(offset(dst, bld, i), component_i);

above?  Maybe I'm missing something but I don't see why the extra moves are
needed.


> +   }
> +   if (components % 2) {
> +  bld.MOV(offset(dst, bld, components - 1), tmp);
> +   }
> +}
> +
> +
>  /**
>   * This helper does the inverse operation of
>   * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA.
> @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const
> fs_builder ,
> return dst;
>  }
>
> +void
> +shuffle_16bit_data_for_32bit_write(const fs_builder ,
> +   const fs_reg ,
> +   const fs_reg ,
> +   uint32_t components)
> +{
> +   assert(type_sz(src.type) == 2);
> +   assert(type_sz(dst.type) == 4);
> +
> +   fs_reg tmp = bld.vgrf(dst.type);
> +
> +   for (unsigned i = 0; i < components; i++) {
> +  const fs_reg component_i = offset(src, bld, i);
> +  bld.MOV(subscript(tmp, src.type, i % 2), component_i);
> +  if (i % 2) {
> + bld.MOV(offset(dst, bld, i / 2), tmp);
> +  }
>

Again, why the extra MOVs?  Why not

bld.MOV(subscript(offset(tmp, bld, i / 2), src.type, i % 2), component_i);

instead of the extra MOVs?

--Jason


> +   }
> +   if (components % 2) {
> +  bld.MOV(offset(dst, bld, components / 2), tmp);
> +   }
> +}
> +
> +
>  fs_reg
>  setup_imm_df(const fs_builder , double v)
>  {
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

2017-11-29 Thread Jose Maria Casanova Crespo

This helpers are used to load/store 16-bit types from/to 32-bit
components.

The functions shuffle_32bit_load_result_to_16bit_data and
shuffle_16bit_data_for_32bit_write are implemented in a similar
way than the analogous functions for handling 64-bit types.
---
 src/intel/compiler/brw_fs.h   | 11 +
 src/intel/compiler/brw_fs_nir.cpp | 51 +++
 2 files changed, 62 insertions(+)

diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 19b897e7a9..30557324d5 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -497,6 +497,17 @@ void shuffle_32bit_load_result_to_64bit_data(const 
brw::fs_builder ,
 fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder ,
   const fs_reg ,
   uint32_t components);
+
+void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder ,
+ const fs_reg ,
+ const fs_reg ,
+ uint32_t components);
+
+void shuffle_16bit_data_for_32bit_write(const brw::fs_builder ,
+const fs_reg ,
+const fs_reg ,
+uint32_t components);
+
 fs_reg setup_imm_df(const brw::fs_builder ,
 double v);
 
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 726b2fcee7..c091241132 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const fs_builder 
,
}
 }
 
+void
+shuffle_32bit_load_result_to_16bit_data(const fs_builder ,
+const fs_reg ,
+const fs_reg ,
+uint32_t components)
+{
+   assert(type_sz(src.type) == 4);
+   assert(type_sz(dst.type) == 2);
+
+   fs_reg tmp = retype(bld.vgrf(src.type), dst.type);
+
+   for (unsigned i = 0; i < components; i++) {
+  const fs_reg component_i = subscript(offset(src, bld, i / 2), dst.type, 
i % 2);
+
+  bld.MOV(offset(tmp, bld, i % 2), component_i);
+
+  if (i % 2) {
+ bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0));
+ bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1));
+  }
+   }
+   if (components % 2) {
+  bld.MOV(offset(dst, bld, components - 1), tmp);
+   }
+}
+
+
 /**
  * This helper does the inverse operation of
  * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA.
@@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const fs_builder ,
return dst;
 }
 
+void
+shuffle_16bit_data_for_32bit_write(const fs_builder ,
+   const fs_reg ,
+   const fs_reg ,
+   uint32_t components)
+{
+   assert(type_sz(src.type) == 2);
+   assert(type_sz(dst.type) == 4);
+
+   fs_reg tmp = bld.vgrf(dst.type);
+
+   for (unsigned i = 0; i < components; i++) {
+  const fs_reg component_i = offset(src, bld, i);
+  bld.MOV(subscript(tmp, src.type, i % 2), component_i);
+  if (i % 2) {
+ bld.MOV(offset(dst, bld, i / 2), tmp);
+  }
+   }
+   if (components % 2) {
+  bld.MOV(offset(dst, bld, components / 2), tmp);
+   }
+}
+
+
 fs_reg
 setup_imm_df(const fs_builder , double v)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

[Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

4 matches

Site Navigation

Mail list logo

Footer information