Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true
There's no updated series yet. This patch will work on its own and the issue that was pointed out doesn't affect behavior at all. On 1/7/19 4:47 PM, Lionel Landwerlin wrote: I did not but then saw someone pointed out an issue with this particular patch. I can do tomorrow. Do you have link to the updated series? Thanks, - Lionel On 07/01/2019 16:54, Jonathan Marek wrote: Hi, Did you get a chance try this? If not, I might be able to try it myself as I have Intel HW. On 12/19/18 12:34 PM, Lionel Landwerlin wrote: Hey Jonathan, I'm kind of curious as to whether we can have a single expression that pretty much generates the same final code (through some of the algebraic lowering/optimizations). I'll give it a try on Intel HW, see what it does. - Lionel On 19/12/2018 16:39, Jonathan Marek wrote: When ffma is available, we can use a different arrangement of constants to get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7 scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma. Signed-off-by: Jonathan Marek --- src/compiler/nir/nir_lower_tex.c | 62 ++-- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 6a6b6c41a7..f7c821bb34 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, nir_ssa_def *a) { - nir_const_value m[3] = { - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, - { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } - }; - - nir_ssa_def *yuv = - nir_vec4(b, - nir_fmul(b, nir_imm_float(b, 1.16438356f), - nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_imm_float(b, 0.0)); - - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); - - nir_ssa_def *result = nir_vec4(b, red, green, blue, a); + nir_ssa_def *result; + + + if (b->shader->options->fuse_ffma) { + nir_const_value m[4] = { + { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } }, + { .f32 = { 0.0f, -0.39176229f, 2.01723214f, 0.0f } }, + { .f32 = { 1.59602678f,-0.81296764f, 0.0f, 0.0f } }, + }; + static const float y_off = -16.0f * 1.16438356f / 255.0f; + static const float sc = 128.0f / 255.0f; + + nir_ssa_def *offset = + nir_vec4(b, + nir_imm_float(b, y_off - sc * 1.59602678f), + nir_imm_float(b, y_off + sc * (0.81296764f + 0.39176229f)), + nir_imm_float(b, y_off - sc * 2.01723214f), + a); + + result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]), + nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]), + nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]), offset))); + } else { + nir_const_value m[3] = { + { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, + { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, + { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } + }; + + nir_ssa_def *yuv = + nir_vec4(b, + nir_fmul(b, nir_imm_float(b, 1.16438356f), + nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), + nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_imm_float(b, 0.0)); + + nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); + nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); + nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); + + result = nir_vec4(b, red, green, blue, a); + } nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result)); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true
I did not but then saw someone pointed out an issue with this particular patch. I can do tomorrow. Do you have link to the updated series? Thanks, - Lionel On 07/01/2019 16:54, Jonathan Marek wrote: Hi, Did you get a chance try this? If not, I might be able to try it myself as I have Intel HW. On 12/19/18 12:34 PM, Lionel Landwerlin wrote: Hey Jonathan, I'm kind of curious as to whether we can have a single expression that pretty much generates the same final code (through some of the algebraic lowering/optimizations). I'll give it a try on Intel HW, see what it does. - Lionel On 19/12/2018 16:39, Jonathan Marek wrote: When ffma is available, we can use a different arrangement of constants to get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7 scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma. Signed-off-by: Jonathan Marek --- src/compiler/nir/nir_lower_tex.c | 62 ++-- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 6a6b6c41a7..f7c821bb34 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, nir_ssa_def *a) { - nir_const_value m[3] = { - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, - { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } - }; - - nir_ssa_def *yuv = - nir_vec4(b, - nir_fmul(b, nir_imm_float(b, 1.16438356f), - nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_imm_float(b, 0.0)); - - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); - - nir_ssa_def *result = nir_vec4(b, red, green, blue, a); + nir_ssa_def *result; + + + if (b->shader->options->fuse_ffma) { + nir_const_value m[4] = { + { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } }, + { .f32 = { 0.0f, -0.39176229f, 2.01723214f, 0.0f } }, + { .f32 = { 1.59602678f,-0.81296764f, 0.0f, 0.0f } }, + }; + static const float y_off = -16.0f * 1.16438356f / 255.0f; + static const float sc = 128.0f / 255.0f; + + nir_ssa_def *offset = + nir_vec4(b, + nir_imm_float(b, y_off - sc * 1.59602678f), + nir_imm_float(b, y_off + sc * (0.81296764f + 0.39176229f)), + nir_imm_float(b, y_off - sc * 2.01723214f), + a); + + result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]), + nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]), + nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]), offset))); + } else { + nir_const_value m[3] = { + { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, + { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, + { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } + }; + + nir_ssa_def *yuv = + nir_vec4(b, + nir_fmul(b, nir_imm_float(b, 1.16438356f), + nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), + nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_imm_float(b, 0.0)); + + nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); + nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); + nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); + + result = nir_vec4(b, red, green, blue, a); + } nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result)); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true
Hi, Did you get a chance try this? If not, I might be able to try it myself as I have Intel HW. On 12/19/18 12:34 PM, Lionel Landwerlin wrote: Hey Jonathan, I'm kind of curious as to whether we can have a single expression that pretty much generates the same final code (through some of the algebraic lowering/optimizations). I'll give it a try on Intel HW, see what it does. - Lionel On 19/12/2018 16:39, Jonathan Marek wrote: When ffma is available, we can use a different arrangement of constants to get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7 scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma. Signed-off-by: Jonathan Marek --- src/compiler/nir/nir_lower_tex.c | 62 ++-- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 6a6b6c41a7..f7c821bb34 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, nir_ssa_def *a) { - nir_const_value m[3] = { - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, - { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } - }; - - nir_ssa_def *yuv = - nir_vec4(b, - nir_fmul(b, nir_imm_float(b, 1.16438356f), - nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_imm_float(b, 0.0)); - - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); - - nir_ssa_def *result = nir_vec4(b, red, green, blue, a); + nir_ssa_def *result; + + + if (b->shader->options->fuse_ffma) { + nir_const_value m[4] = { + { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } }, + { .f32 = { 0.0f, -0.39176229f, 2.01723214f, 0.0f } }, + { .f32 = { 1.59602678f,-0.81296764f, 0.0f, 0.0f } }, + }; + static const float y_off = -16.0f * 1.16438356f / 255.0f; + static const float sc = 128.0f / 255.0f; + + nir_ssa_def *offset = + nir_vec4(b, + nir_imm_float(b, y_off - sc * 1.59602678f), + nir_imm_float(b, y_off + sc * (0.81296764f + 0.39176229f)), + nir_imm_float(b, y_off - sc * 2.01723214f), + a); + + result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]), + nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]), + nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]), offset))); + } else { + nir_const_value m[3] = { + { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, + { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, + { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } + }; + + nir_ssa_def *yuv = + nir_vec4(b, + nir_fmul(b, nir_imm_float(b, 1.16438356f), + nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), + nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_imm_float(b, 0.0)); + + nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); + nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); + nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); + + result = nir_vec4(b, red, green, blue, a); + } nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result)); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true
On 12/20/2018 01:28 AM, Nils Wallménius wrote: Den ons 19 dec. 2018 17:44 skrev Jonathan Marek : When ffma is available, we can use a different arrangement of constants to get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7 scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma. Signed-off-by: Jonathan Marek --- src/compiler/nir/nir_lower_tex.c | 62 ++-- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 6a6b6c41a7..f7c821bb34 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, nir_ssa_def *a) { - nir_const_value m[3] = { - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, - { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } - }; - - nir_ssa_def *yuv = - nir_vec4(b, - nir_fmul(b, nir_imm_float(b, 1.16438356f), -nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_imm_float(b, 0.0)); - - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); - - nir_ssa_def *result = nir_vec4(b, red, green, blue, a); + nir_ssa_def *result; + + + if (b->shader->options->fuse_ffma) { + nir_const_value m[4] = { Drive-by comment, but shouldn't this^ be m[3]? Regards Nils Yes, it should be m[3]. It was originally 4 before alpha was added. + { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } }, + { .f32 = { 0.0f, -0.39176229f, 2.01723214f, 0.0f } }, + { .f32 = { 1.59602678f,-0.81296764f, 0.0f,0.0f } }, + }; + static const float y_off = -16.0f * 1.16438356f / 255.0f; + static const float sc = 128.0f / 255.0f; + + nir_ssa_def *offset = + nir_vec4(b, + nir_imm_float(b, y_off - sc * 1.59602678f), + nir_imm_float(b, y_off + sc * (0.81296764f + 0.39176229f)), + nir_imm_float(b, y_off - sc * 2.01723214f), + a); + + result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]), + nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]), +nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]), offset))); + } else { + nir_const_value m[3] = { + { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, + { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, + { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } + }; + + nir_ssa_def *yuv = + nir_vec4(b, + nir_fmul(b, nir_imm_float(b, 1.16438356f), + nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), + nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_imm_float(b, 0.0)); + + nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); + nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); + nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); + + result = nir_vec4(b, red, green, blue, a); + } nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result)); } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true
Den ons 19 dec. 2018 17:44 skrev Jonathan Marek : > When ffma is available, we can use a different arrangement of constants to > get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7 > scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma. > > Signed-off-by: Jonathan Marek > --- > src/compiler/nir/nir_lower_tex.c | 62 ++-- > 1 file changed, 43 insertions(+), 19 deletions(-) > > diff --git a/src/compiler/nir/nir_lower_tex.c > b/src/compiler/nir/nir_lower_tex.c > index 6a6b6c41a7..f7c821bb34 100644 > --- a/src/compiler/nir/nir_lower_tex.c > +++ b/src/compiler/nir/nir_lower_tex.c > @@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr > *tex, > nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, > nir_ssa_def *a) > { > - nir_const_value m[3] = { > - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, > - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, > - { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } > - }; > - > - nir_ssa_def *yuv = > - nir_vec4(b, > - nir_fmul(b, nir_imm_float(b, 1.16438356f), > -nir_fadd(b, y, nir_imm_float(b, -16.0f / > 255.0f))), > - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / > 255.0f)), 0), > - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / > 255.0f)), 0), > - nir_imm_float(b, 0.0)); > - > - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); > - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); > - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); > - > - nir_ssa_def *result = nir_vec4(b, red, green, blue, a); > + nir_ssa_def *result; > + > + > + if (b->shader->options->fuse_ffma) { > + nir_const_value m[4] = { > Drive-by comment, but shouldn't this^ be m[3]? Regards Nils + { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } }, > + { .f32 = { 0.0f, -0.39176229f, 2.01723214f, 0.0f } }, > + { .f32 = { 1.59602678f,-0.81296764f, 0.0f,0.0f } }, > + }; > + static const float y_off = -16.0f * 1.16438356f / 255.0f; > + static const float sc = 128.0f / 255.0f; > + > + nir_ssa_def *offset = > + nir_vec4(b, > + nir_imm_float(b, y_off - sc * 1.59602678f), > + nir_imm_float(b, y_off + sc * (0.81296764f + > 0.39176229f)), > + nir_imm_float(b, y_off - sc * 2.01723214f), > + a); > + > + result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]), > + nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]), > +nir_ffma(b, v, nir_build_imm(b, 4, 32, > m[2]), offset))); > + } else { > + nir_const_value m[3] = { > + { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, > + { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, > + { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } > + }; > + > + nir_ssa_def *yuv = > + nir_vec4(b, > + nir_fmul(b, nir_imm_float(b, 1.16438356f), > + nir_fadd(b, y, nir_imm_float(b, -16.0f / > 255.0f))), > + nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f > / 255.0f)), 0), > + nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f > / 255.0f)), 0), > + nir_imm_float(b, 0.0)); > + > + nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); > + nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, > m[1])); > + nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, > m[2])); > + > + result = nir_vec4(b, red, green, blue, a); > + } > > nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result)); > } > -- > 2.17.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true
Hey Jonathan, I'm kind of curious as to whether we can have a single expression that pretty much generates the same final code (through some of the algebraic lowering/optimizations). I'll give it a try on Intel HW, see what it does. - Lionel On 19/12/2018 16:39, Jonathan Marek wrote: When ffma is available, we can use a different arrangement of constants to get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7 scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma. Signed-off-by: Jonathan Marek --- src/compiler/nir/nir_lower_tex.c | 62 ++-- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 6a6b6c41a7..f7c821bb34 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, nir_ssa_def *a) { - nir_const_value m[3] = { - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, - { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } - }; - - nir_ssa_def *yuv = - nir_vec4(b, - nir_fmul(b, nir_imm_float(b, 1.16438356f), -nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_imm_float(b, 0.0)); - - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); - - nir_ssa_def *result = nir_vec4(b, red, green, blue, a); + nir_ssa_def *result; + + + if (b->shader->options->fuse_ffma) { + nir_const_value m[4] = { + { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } }, + { .f32 = { 0.0f, -0.39176229f, 2.01723214f, 0.0f } }, + { .f32 = { 1.59602678f,-0.81296764f, 0.0f,0.0f } }, + }; + static const float y_off = -16.0f * 1.16438356f / 255.0f; + static const float sc = 128.0f / 255.0f; + + nir_ssa_def *offset = + nir_vec4(b, + nir_imm_float(b, y_off - sc * 1.59602678f), + nir_imm_float(b, y_off + sc * (0.81296764f + 0.39176229f)), + nir_imm_float(b, y_off - sc * 2.01723214f), + a); + + result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]), + nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]), +nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]), offset))); + } else { + nir_const_value m[3] = { + { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, + { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, + { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } + }; + + nir_ssa_def *yuv = + nir_vec4(b, + nir_fmul(b, nir_imm_float(b, 1.16438356f), + nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), + nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_imm_float(b, 0.0)); + + nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); + nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); + nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); + + result = nir_vec4(b, red, green, blue, a); + } nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result)); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/16] nir: improve convert_yuv_to_rgb when fuse_ffma=true
When ffma is available, we can use a different arrangement of constants to get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7 scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma. Signed-off-by: Jonathan Marek --- src/compiler/nir/nir_lower_tex.c | 62 ++-- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 6a6b6c41a7..f7c821bb34 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, nir_ssa_def *a) { - nir_const_value m[3] = { - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, - { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } - }; - - nir_ssa_def *yuv = - nir_vec4(b, - nir_fmul(b, nir_imm_float(b, 1.16438356f), -nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_imm_float(b, 0.0)); - - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); - - nir_ssa_def *result = nir_vec4(b, red, green, blue, a); + nir_ssa_def *result; + + + if (b->shader->options->fuse_ffma) { + nir_const_value m[4] = { + { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } }, + { .f32 = { 0.0f, -0.39176229f, 2.01723214f, 0.0f } }, + { .f32 = { 1.59602678f,-0.81296764f, 0.0f,0.0f } }, + }; + static const float y_off = -16.0f * 1.16438356f / 255.0f; + static const float sc = 128.0f / 255.0f; + + nir_ssa_def *offset = + nir_vec4(b, + nir_imm_float(b, y_off - sc * 1.59602678f), + nir_imm_float(b, y_off + sc * (0.81296764f + 0.39176229f)), + nir_imm_float(b, y_off - sc * 2.01723214f), + a); + + result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]), + nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]), +nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]), offset))); + } else { + nir_const_value m[3] = { + { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, + { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, + { .f32 = { 1.0f, 2.01723214f, 0.0f,0.0f } } + }; + + nir_ssa_def *yuv = + nir_vec4(b, + nir_fmul(b, nir_imm_float(b, 1.16438356f), + nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), + nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), + nir_imm_float(b, 0.0)); + + nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); + nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); + nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); + + result = nir_vec4(b, red, green, blue, a); + } nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(result)); } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev