Re: [Mesa-dev] [PATCH 31/59] intel/compiler: fix ddx and ddy for 16-bit float

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> We were assuming 32-bit elements.
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 34 +
>  1 file changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index 08dd83dded7..bffd9bc4787 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1259,7 +1259,7 @@ fs_generator::generate_ddx(const fs_inst *inst,
> struct brw_reg src0 = src;
> struct brw_reg src1 = src;
>
> -   src0.subnr   = sizeof(float);
> +   src0.subnr   = type_sz(src.type);
>

Should this be "+="?  I think this is broken if we're SIMD8 and in the
second half of a register.


> src0.vstride = vstride;
> src0.width   = width;
> src0.hstride = BRW_HORIZONTAL_STRIDE_0;
> @@ -1278,23 +1278,25 @@ void
>  fs_generator::generate_ddy(const fs_inst *inst,
> struct brw_reg dst, struct brw_reg src)
>  {
> +   const uint32_t type_size = type_sz(src.type);
> +
> if (inst->opcode == FS_OPCODE_DDY_FINE) {
>/* produce accurate derivatives */
>if (devinfo->gen >= 11) {
>   src = stride(src, 0, 2, 1);
> - struct brw_reg src_0  = byte_offset(src,  0 * sizeof(float));
> - struct brw_reg src_2  = byte_offset(src,  2 * sizeof(float));
> - struct brw_reg src_4  = byte_offset(src,  4 * sizeof(float));
> - struct brw_reg src_6  = byte_offset(src,  6 * sizeof(float));
> - struct brw_reg src_8  = byte_offset(src,  8 * sizeof(float));
> - struct brw_reg src_10 = byte_offset(src, 10 * sizeof(float));
> - struct brw_reg src_12 = byte_offset(src, 12 * sizeof(float));
> - struct brw_reg src_14 = byte_offset(src, 14 * sizeof(float));
> -
> - struct brw_reg dst_0  = byte_offset(dst,  0 * sizeof(float));
> - struct brw_reg dst_4  = byte_offset(dst,  4 * sizeof(float));
> - struct brw_reg dst_8  = byte_offset(dst,  8 * sizeof(float));
> - struct brw_reg dst_12 = byte_offset(dst, 12 * sizeof(float));
> + struct brw_reg src_0  = byte_offset(src,  0 * type_size);
> + struct brw_reg src_2  = byte_offset(src,  2 * type_size);
> + struct brw_reg src_4  = byte_offset(src,  4 * type_size);
> + struct brw_reg src_6  = byte_offset(src,  6 * type_size);
> + struct brw_reg src_8  = byte_offset(src,  8 * type_size);
> + struct brw_reg src_10 = byte_offset(src, 10 * type_size);
> + struct brw_reg src_12 = byte_offset(src, 12 * type_size);
> + struct brw_reg src_14 = byte_offset(src, 14 * type_size);
> +
> + struct brw_reg dst_0  = byte_offset(dst,  0 * type_size);
> + struct brw_reg dst_4  = byte_offset(dst,  4 * type_size);
> + struct brw_reg dst_8  = byte_offset(dst,  8 * type_size);
> + struct brw_reg dst_12 = byte_offset(dst, 12 * type_size);
>
>   brw_push_insn_state(p);
>   brw_set_default_exec_size(p, BRW_EXECUTE_4);
> @@ -1323,8 +1325,8 @@ fs_generator::generate_ddy(const fs_inst *inst,
>/* replicate the derivative at the top-left pixel to other pixels */
>struct brw_reg src0 = stride(src, 4, 4, 0);
>struct brw_reg src1 = stride(src, 4, 4, 0);
> -  src0.subnr = 0 * sizeof(float);
> -  src1.subnr = 2 * sizeof(float);
> +  src0.subnr = 0 * type_size;
> +  src1.subnr = 2 * type_size;
>

Again, +=?  Or, better yet, maybe byte_offset().


>
>brw_ADD(p, dst, negate(src0), src1);
> }
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 31/59] intel/compiler: fix ddx and ddy for 16-bit float

2018-12-03 Thread Iago Toral Quiroga
We were assuming 32-bit elements.
---
 src/intel/compiler/brw_fs_generator.cpp | 34 +
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 08dd83dded7..bffd9bc4787 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1259,7 +1259,7 @@ fs_generator::generate_ddx(const fs_inst *inst,
struct brw_reg src0 = src;
struct brw_reg src1 = src;
 
-   src0.subnr   = sizeof(float);
+   src0.subnr   = type_sz(src.type);
src0.vstride = vstride;
src0.width   = width;
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
@@ -1278,23 +1278,25 @@ void
 fs_generator::generate_ddy(const fs_inst *inst,
struct brw_reg dst, struct brw_reg src)
 {
+   const uint32_t type_size = type_sz(src.type);
+
if (inst->opcode == FS_OPCODE_DDY_FINE) {
   /* produce accurate derivatives */
   if (devinfo->gen >= 11) {
  src = stride(src, 0, 2, 1);
- struct brw_reg src_0  = byte_offset(src,  0 * sizeof(float));
- struct brw_reg src_2  = byte_offset(src,  2 * sizeof(float));
- struct brw_reg src_4  = byte_offset(src,  4 * sizeof(float));
- struct brw_reg src_6  = byte_offset(src,  6 * sizeof(float));
- struct brw_reg src_8  = byte_offset(src,  8 * sizeof(float));
- struct brw_reg src_10 = byte_offset(src, 10 * sizeof(float));
- struct brw_reg src_12 = byte_offset(src, 12 * sizeof(float));
- struct brw_reg src_14 = byte_offset(src, 14 * sizeof(float));
-
- struct brw_reg dst_0  = byte_offset(dst,  0 * sizeof(float));
- struct brw_reg dst_4  = byte_offset(dst,  4 * sizeof(float));
- struct brw_reg dst_8  = byte_offset(dst,  8 * sizeof(float));
- struct brw_reg dst_12 = byte_offset(dst, 12 * sizeof(float));
+ struct brw_reg src_0  = byte_offset(src,  0 * type_size);
+ struct brw_reg src_2  = byte_offset(src,  2 * type_size);
+ struct brw_reg src_4  = byte_offset(src,  4 * type_size);
+ struct brw_reg src_6  = byte_offset(src,  6 * type_size);
+ struct brw_reg src_8  = byte_offset(src,  8 * type_size);
+ struct brw_reg src_10 = byte_offset(src, 10 * type_size);
+ struct brw_reg src_12 = byte_offset(src, 12 * type_size);
+ struct brw_reg src_14 = byte_offset(src, 14 * type_size);
+
+ struct brw_reg dst_0  = byte_offset(dst,  0 * type_size);
+ struct brw_reg dst_4  = byte_offset(dst,  4 * type_size);
+ struct brw_reg dst_8  = byte_offset(dst,  8 * type_size);
+ struct brw_reg dst_12 = byte_offset(dst, 12 * type_size);
 
  brw_push_insn_state(p);
  brw_set_default_exec_size(p, BRW_EXECUTE_4);
@@ -1323,8 +1325,8 @@ fs_generator::generate_ddy(const fs_inst *inst,
   /* replicate the derivative at the top-left pixel to other pixels */
   struct brw_reg src0 = stride(src, 4, 4, 0);
   struct brw_reg src1 = stride(src, 4, 4, 0);
-  src0.subnr = 0 * sizeof(float);
-  src1.subnr = 2 * sizeof(float);
+  src0.subnr = 0 * type_size;
+  src1.subnr = 2 * type_size;
 
   brw_ADD(p, dst, negate(src0), src1);
}
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev