Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components
On Thu, Nov 30, 2017 at 3:41 PM, Chema Casanovawrote: > > > On 30/11/17 23:21, Jason Ekstrand wrote: > > On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo > > > wrote: > > > > This helpers are used to load/store 16-bit types from/to 32-bit > > components. > > > > The functions shuffle_32bit_load_result_to_16bit_data and > > shuffle_16bit_data_for_32bit_write are implemented in a similar > > way than the analogous functions for handling 64-bit types. > > --- > > src/intel/compiler/brw_fs.h | 11 + > > src/intel/compiler/brw_fs_nir.cpp | 51 > > +++ > > 2 files changed, 62 insertions(+) > > > > diff --git a/src/intel/compiler/brw_fs.h > b/src/intel/compiler/brw_fs.h > > index 19b897e7a9..30557324d5 100644 > > --- a/src/intel/compiler/brw_fs.h > > +++ b/src/intel/compiler/brw_fs.h > > @@ -497,6 +497,17 @@ void > > shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder , > > fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder > , > >const fs_reg , > >uint32_t components); > > + > > +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder > > , > > + const fs_reg , > > + const fs_reg , > > + uint32_t components); > > + > > +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder , > > +const fs_reg , > > +const fs_reg , > > +uint32_t components); > > + > > fs_reg setup_imm_df(const brw::fs_builder , > > double v); > > > > diff --git a/src/intel/compiler/brw_fs_nir.cpp > > b/src/intel/compiler/brw_fs_nir.cpp > > index 726b2fcee7..c091241132 100644 > > --- a/src/intel/compiler/brw_fs_nir.cpp > > +++ b/src/intel/compiler/brw_fs_nir.cpp > > @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const > > fs_builder , > > } > > } > > > > +void > > +shuffle_32bit_load_result_to_16bit_data(const fs_builder , > > +const fs_reg , > > +const fs_reg , > > +uint32_t components) > > +{ > > + assert(type_sz(src.type) == 4); > > + assert(type_sz(dst.type) == 2); > > + > > + fs_reg tmp = retype(bld.vgrf(src.type), dst.type); > > + > > + for (unsigned i = 0; i < components; i++) { > > + const fs_reg component_i = subscript(offset(src, bld, i / 2), > > dst.type, i % 2); > > + > > + bld.MOV(offset(tmp, bld, i % 2), component_i); > > + > > + if (i % 2) { > > + bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0)); > > + bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1)); > > + } > > > > > > I'm very confused by this extra moving. Why can't we just do > > > > bld.MOV(offset(dst, bld, i), component_i); > > > > above? Maybe I'm missing something but I don't see why the extra moves > > are needed. > > > There is a comment in the previous function > shuffle_32bit_load_result_to_64bit_data that explains the similar > situation that still applies in shuffle_32bit_load_result_to_16bit_data, > what about including the following comment. > > /* A temporary that we will use to shuffle the 16-bit data of each > * component in the vector into valid 32-bit data. We can't write > * directly to dst because dst can be the same as src and in that > * case the first MOV in the loop below would overwrite the data > * read in the second MOV. > */ > > But in any case I've just checked, and at first sight at the 6 final > uses of this function this situation never happens. > > > > > > > + } > > + if (components % 2) { > > + bld.MOV(offset(dst, bld, components - 1), tmp); > > + } > > +} > > + > > + > > /** > > * This helper does the inverse operation of > > * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA. > > @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const > > fs_builder , > > return dst; > > } > > > > +void > > +shuffle_16bit_data_for_32bit_write(const fs_builder , > > + const fs_reg , > > + const fs_reg , > > + uint32_t components) > > +{ > > + assert(type_sz(src.type) == 2); > > + assert(type_sz(dst.type) == 4); > > + > > + fs_reg tmp = bld.vgrf(dst.type); > > +
Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components
On 30/11/17 23:21, Jason Ekstrand wrote: > On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo >> wrote: > > This helpers are used to load/store 16-bit types from/to 32-bit > components. > > The functions shuffle_32bit_load_result_to_16bit_data and > shuffle_16bit_data_for_32bit_write are implemented in a similar > way than the analogous functions for handling 64-bit types. > --- > src/intel/compiler/brw_fs.h | 11 + > src/intel/compiler/brw_fs_nir.cpp | 51 > +++ > 2 files changed, 62 insertions(+) > > diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h > index 19b897e7a9..30557324d5 100644 > --- a/src/intel/compiler/brw_fs.h > +++ b/src/intel/compiler/brw_fs.h > @@ -497,6 +497,17 @@ void > shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder , > fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder , > const fs_reg , > uint32_t components); > + > +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder > , > + const fs_reg , > + const fs_reg , > + uint32_t components); > + > +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder , > + const fs_reg , > + const fs_reg , > + uint32_t components); > + > fs_reg setup_imm_df(const brw::fs_builder , > double v); > > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 726b2fcee7..c091241132 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const > fs_builder , > } > } > > +void > +shuffle_32bit_load_result_to_16bit_data(const fs_builder , > + const fs_reg , > + const fs_reg , > + uint32_t components) > +{ > + assert(type_sz(src.type) == 4); > + assert(type_sz(dst.type) == 2); > + > + fs_reg tmp = retype(bld.vgrf(src.type), dst.type); > + > + for (unsigned i = 0; i < components; i++) { > + const fs_reg component_i = subscript(offset(src, bld, i / 2), > dst.type, i % 2); > + > + bld.MOV(offset(tmp, bld, i % 2), component_i); > + > + if (i % 2) { > + bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0)); > + bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1)); > + } > > > I'm very confused by this extra moving. Why can't we just do > > bld.MOV(offset(dst, bld, i), component_i); > > above? Maybe I'm missing something but I don't see why the extra moves > are needed. There is a comment in the previous function shuffle_32bit_load_result_to_64bit_data that explains the similar situation that still applies in shuffle_32bit_load_result_to_16bit_data, what about including the following comment. /* A temporary that we will use to shuffle the 16-bit data of each * component in the vector into valid 32-bit data. We can't write * directly to dst because dst can be the same as src and in that * case the first MOV in the loop below would overwrite the data * read in the second MOV. */ But in any case I've just checked, and at first sight at the 6 final uses of this function this situation never happens. > > > + } > + if (components % 2) { > + bld.MOV(offset(dst, bld, components - 1), tmp); > + } > +} > + > + > /** > * This helper does the inverse operation of > * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA. > @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const > fs_builder , > return dst; > } > > +void > +shuffle_16bit_data_for_32bit_write(const fs_builder , > + const fs_reg , > + const fs_reg , > + uint32_t components) > +{ > + assert(type_sz(src.type) == 2); > + assert(type_sz(dst.type) == 4); > + > + fs_reg tmp = bld.vgrf(dst.type); > + > + for (unsigned i = 0; i < components; i++) { > + const fs_reg component_i = offset(src, bld, i); > + bld.MOV(subscript(tmp, src.type, i % 2), component_i); > + if (i % 2) { > + bld.MOV(offset(dst, bld, i / 2), tmp); > + } > > > Again, why the extra MOVs?
Re: [Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components
On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo < jmcasan...@igalia.com> wrote: > This helpers are used to load/store 16-bit types from/to 32-bit > components. > > The functions shuffle_32bit_load_result_to_16bit_data and > shuffle_16bit_data_for_32bit_write are implemented in a similar > way than the analogous functions for handling 64-bit types. > --- > src/intel/compiler/brw_fs.h | 11 + > src/intel/compiler/brw_fs_nir.cpp | 51 ++ > + > 2 files changed, 62 insertions(+) > > diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h > index 19b897e7a9..30557324d5 100644 > --- a/src/intel/compiler/brw_fs.h > +++ b/src/intel/compiler/brw_fs.h > @@ -497,6 +497,17 @@ void shuffle_32bit_load_result_to_64bit_data(const > brw::fs_builder , > fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder , >const fs_reg , >uint32_t components); > + > +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder , > + const fs_reg , > + const fs_reg , > + uint32_t components); > + > +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder , > +const fs_reg , > +const fs_reg , > +uint32_t components); > + > fs_reg setup_imm_df(const brw::fs_builder , > double v); > > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 726b2fcee7..c091241132 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const > fs_builder , > } > } > > +void > +shuffle_32bit_load_result_to_16bit_data(const fs_builder , > +const fs_reg , > +const fs_reg , > +uint32_t components) > +{ > + assert(type_sz(src.type) == 4); > + assert(type_sz(dst.type) == 2); > + > + fs_reg tmp = retype(bld.vgrf(src.type), dst.type); > + > + for (unsigned i = 0; i < components; i++) { > + const fs_reg component_i = subscript(offset(src, bld, i / 2), > dst.type, i % 2); > + > + bld.MOV(offset(tmp, bld, i % 2), component_i); > + > + if (i % 2) { > + bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0)); > + bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1)); > + } > I'm very confused by this extra moving. Why can't we just do bld.MOV(offset(dst, bld, i), component_i); above? Maybe I'm missing something but I don't see why the extra moves are needed. > + } > + if (components % 2) { > + bld.MOV(offset(dst, bld, components - 1), tmp); > + } > +} > + > + > /** > * This helper does the inverse operation of > * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA. > @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const > fs_builder , > return dst; > } > > +void > +shuffle_16bit_data_for_32bit_write(const fs_builder , > + const fs_reg , > + const fs_reg , > + uint32_t components) > +{ > + assert(type_sz(src.type) == 2); > + assert(type_sz(dst.type) == 4); > + > + fs_reg tmp = bld.vgrf(dst.type); > + > + for (unsigned i = 0; i < components; i++) { > + const fs_reg component_i = offset(src, bld, i); > + bld.MOV(subscript(tmp, src.type, i % 2), component_i); > + if (i % 2) { > + bld.MOV(offset(dst, bld, i / 2), tmp); > + } > Again, why the extra MOVs? Why not bld.MOV(subscript(offset(tmp, bld, i / 2), src.type, i % 2), component_i); instead of the extra MOVs? --Jason > + } > + if (components % 2) { > + bld.MOV(offset(dst, bld, components / 2), tmp); > + } > +} > + > + > fs_reg > setup_imm_df(const fs_builder , double v) > { > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components
This helpers are used to load/store 16-bit types from/to 32-bit components. The functions shuffle_32bit_load_result_to_16bit_data and shuffle_16bit_data_for_32bit_write are implemented in a similar way than the analogous functions for handling 64-bit types. --- src/intel/compiler/brw_fs.h | 11 + src/intel/compiler/brw_fs_nir.cpp | 51 +++ 2 files changed, 62 insertions(+) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 19b897e7a9..30557324d5 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -497,6 +497,17 @@ void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder , fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder , const fs_reg , uint32_t components); + +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder , + const fs_reg , + const fs_reg , + uint32_t components); + +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder , +const fs_reg , +const fs_reg , +uint32_t components); + fs_reg setup_imm_df(const brw::fs_builder , double v); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 726b2fcee7..c091241132 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const fs_builder , } } +void +shuffle_32bit_load_result_to_16bit_data(const fs_builder , +const fs_reg , +const fs_reg , +uint32_t components) +{ + assert(type_sz(src.type) == 4); + assert(type_sz(dst.type) == 2); + + fs_reg tmp = retype(bld.vgrf(src.type), dst.type); + + for (unsigned i = 0; i < components; i++) { + const fs_reg component_i = subscript(offset(src, bld, i / 2), dst.type, i % 2); + + bld.MOV(offset(tmp, bld, i % 2), component_i); + + if (i % 2) { + bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0)); + bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1)); + } + } + if (components % 2) { + bld.MOV(offset(dst, bld, components - 1), tmp); + } +} + + /** * This helper does the inverse operation of * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA. @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const fs_builder , return dst; } +void +shuffle_16bit_data_for_32bit_write(const fs_builder , + const fs_reg , + const fs_reg , + uint32_t components) +{ + assert(type_sz(src.type) == 2); + assert(type_sz(dst.type) == 4); + + fs_reg tmp = bld.vgrf(dst.type); + + for (unsigned i = 0; i < components; i++) { + const fs_reg component_i = offset(src, bld, i); + bld.MOV(subscript(tmp, src.type, i % 2), component_i); + if (i % 2) { + bld.MOV(offset(dst, bld, i / 2), tmp); + } + } + if (components % 2) { + bld.MOV(offset(dst, bld, components / 2), tmp); + } +} + + fs_reg setup_imm_df(const fs_builder , double v) { -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev