Re: [libav-devel] [PATCH 1/6] arm/aarch64: vp9lpf: Calculate !hev directly

2017-02-10 Thread Janne Grunau
On 2017-01-15 22:55:47 +0200, Martin Storsjö wrote:
> Previously we first calculated hev, and then negated it.
> 
> Since we were able to schedule the negation in the middle
> of another calculation, we don't see any gain in all cases.
> 
> Before: Cortex A7  A8  A9 A53  A53/AArch64
> vp9_loop_filter_v_4_8_neon: 147.0   129.0   115.889.0 88.7
> vp9_loop_filter_v_8_8_neon: 242.0   198.5   174.7   140.0136.7
> vp9_loop_filter_v_16_8_neon:500.0   419.5   382.7   293.0275.7
> vp9_loop_filter_v_16_16_neon:   971.2   825.5   731.5   579.0453.0
> After:
> vp9_loop_filter_v_4_8_neon: 143.0   127.7   114.888.0 87.7
> vp9_loop_filter_v_8_8_neon: 241.0   197.2   173.7   140.0136.7
> vp9_loop_filter_v_16_8_neon:497.0   419.5   379.7   293.0275.7
> vp9_loop_filter_v_16_16_neon:   965.2   818.7   731.4   579.0452.0
> ---
>  libavcodec/aarch64/vp9lpf_neon.S | 5 ++---
>  libavcodec/arm/vp9lpf_neon.S | 5 ++---
>  2 files changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/libavcodec/aarch64/vp9lpf_neon.S 
> b/libavcodec/aarch64/vp9lpf_neon.S
> index e9c7d9e..3b8e6eb 100644
> --- a/libavcodec/aarch64/vp9lpf_neon.S
> +++ b/libavcodec/aarch64/vp9lpf_neon.S
> @@ -292,7 +292,7 @@
>  .if \mix != 0
>  sxtlv1.8h,  v1.8b
>  .endif
> -cmhiv5\sz,  v5\sz,  v3\sz  // hev
> +cmhsv5\sz,  v3\sz,  v5\sz  // !hev
>  .if \wd == 8
>  // If a 4/8 or 8/4 mix is used, clear the relevant half of v6
>  .if \mix != 0
> @@ -306,11 +306,10 @@
>  .elseif \wd == 8
>  bic v4\sz,  v4\sz,  v6\sz  // fm && !flat8in
>  .endif
> -mvn v5\sz,  v5\sz  // !hev
> +and v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
>  .if \wd == 16
>  and v7\sz,  v7\sz,  v6\sz  // flat8out && flat8in && fm
>  .endif
> -and v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
>  
>  mul_sz  \tmp3\().8h,  \tmp4\().8h,  \tmp3\().8h, 
> \tmp4\().8h,  \tmp5\().8h,  \tmp5\().8h, \sz // 3 * (q0 - p0)
>  bic \tmp1\sz,  \tmp1\sz,  v5\sz// if (!hev) 
> av_clip_int8 = 0
> diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S
> index fbf2901..c57c0e9 100644
> --- a/libavcodec/arm/vp9lpf_neon.S
> +++ b/libavcodec/arm/vp9lpf_neon.S
> @@ -141,7 +141,7 @@
>  .if \wd == 8
>  vcle.u8 d6,  d6,  d0@ flat8in
>  .endif
> -vcgt.u8 d5,  d5,  d3@ hev
> +vcle.u8 d5,  d5,  d3@ !hev
>  .if \wd == 8
>  vandd6,  d6,  d4@ flat8in && fm
>  .endif
> @@ -151,11 +151,10 @@
>  .elseif \wd == 8
>  vbicd4,  d4,  d6@ fm && !flat8in
>  .endif
> -vmvnd5,  d5 @ !hev
> +vandd5,  d5,  d4@ !hev && fm && !flat8in
>  .if \wd == 16
>  vandd7,  d7,  d6@ flat8out && flat8in && fm
>  .endif
> -vandd5,  d5,  d4@ !hev && fm && !flat8in
>  
>  vmul.s16\tmpq2,  \tmpq2, \tmpq3 @ 3 * (q0 - p0)
>  vbic\tmp1,   \tmp1,   d5@ if (!hev) av_clip_int8 = 0

ok

Janne
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/6] arm/aarch64: vp9lpf: Calculate !hev directly

2017-01-15 Thread Martin Storsjö
Previously we first calculated hev, and then negated it.

Since we were able to schedule the negation in the middle
of another calculation, we don't see any gain in all cases.

Before: Cortex A7  A8  A9 A53  A53/AArch64
vp9_loop_filter_v_4_8_neon: 147.0   129.0   115.889.0 88.7
vp9_loop_filter_v_8_8_neon: 242.0   198.5   174.7   140.0136.7
vp9_loop_filter_v_16_8_neon:500.0   419.5   382.7   293.0275.7
vp9_loop_filter_v_16_16_neon:   971.2   825.5   731.5   579.0453.0
After:
vp9_loop_filter_v_4_8_neon: 143.0   127.7   114.888.0 87.7
vp9_loop_filter_v_8_8_neon: 241.0   197.2   173.7   140.0136.7
vp9_loop_filter_v_16_8_neon:497.0   419.5   379.7   293.0275.7
vp9_loop_filter_v_16_16_neon:   965.2   818.7   731.4   579.0452.0
---
 libavcodec/aarch64/vp9lpf_neon.S | 5 ++---
 libavcodec/arm/vp9lpf_neon.S | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index e9c7d9e..3b8e6eb 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -292,7 +292,7 @@
 .if \mix != 0
 sxtlv1.8h,  v1.8b
 .endif
-cmhiv5\sz,  v5\sz,  v3\sz  // hev
+cmhsv5\sz,  v3\sz,  v5\sz  // !hev
 .if \wd == 8
 // If a 4/8 or 8/4 mix is used, clear the relevant half of v6
 .if \mix != 0
@@ -306,11 +306,10 @@
 .elseif \wd == 8
 bic v4\sz,  v4\sz,  v6\sz  // fm && !flat8in
 .endif
-mvn v5\sz,  v5\sz  // !hev
+and v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
 .if \wd == 16
 and v7\sz,  v7\sz,  v6\sz  // flat8out && flat8in && fm
 .endif
-and v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
 
 mul_sz  \tmp3\().8h,  \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  
\tmp5\().8h,  \tmp5\().8h, \sz // 3 * (q0 - p0)
 bic \tmp1\sz,  \tmp1\sz,  v5\sz// if (!hev) 
av_clip_int8 = 0
diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S
index fbf2901..c57c0e9 100644
--- a/libavcodec/arm/vp9lpf_neon.S
+++ b/libavcodec/arm/vp9lpf_neon.S
@@ -141,7 +141,7 @@
 .if \wd == 8
 vcle.u8 d6,  d6,  d0@ flat8in
 .endif
-vcgt.u8 d5,  d5,  d3@ hev
+vcle.u8 d5,  d5,  d3@ !hev
 .if \wd == 8
 vandd6,  d6,  d4@ flat8in && fm
 .endif
@@ -151,11 +151,10 @@
 .elseif \wd == 8
 vbicd4,  d4,  d6@ fm && !flat8in
 .endif
-vmvnd5,  d5 @ !hev
+vandd5,  d5,  d4@ !hev && fm && !flat8in
 .if \wd == 16
 vandd7,  d7,  d6@ flat8out && flat8in && fm
 .endif
-vandd5,  d5,  d4@ !hev && fm && !flat8in
 
 vmul.s16\tmpq2,  \tmpq2, \tmpq3 @ 3 * (q0 - p0)
 vbic\tmp1,   \tmp1,   d5@ if (!hev) av_clip_int8 = 0
-- 
2.7.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel