Re: [libav-devel] [PATCH 3/4] arm: vp9itxfm: Reorder iadst16 coeffs

2017-02-23 Thread Janne Grunau
On 2017-02-09 14:33:55 +0200, Martin Storsjö wrote:
> This matches the order they are in the 16 bpp version.
> 
> There they are in this order, to make sure we access them in the
> same order they are declared, easing loading only half of the
> coefficients at a time.
> 
> This makes the 8 bpp version match the 16 bpp version better.
> ---
>  libavcodec/arm/vp9itxfm_neon.S | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
> index f74d542..c8eeb76 100644
> --- a/libavcodec/arm/vp9itxfm_neon.S
> +++ b/libavcodec/arm/vp9itxfm_neon.S
> @@ -37,8 +37,8 @@ idct_coeffs:
>  endconst
>  
>  const iadst16_coeffs, align=4
> -.short  16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
> -.short  11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
> +.short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
> +.short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
>  endconst
>  
>  @ Do four 4x4 transposes, using q registers for the subtransposes that don't
> @@ -672,19 +672,19 @@ function iadst16
>  vld1.16 {q0-q1}, [r12,:128]
>  
>  mbutterfly_lq3,  q2,  d31, d16, d0[1], d0[0] @ q3  = t1,   q2  = 
> t0
> -mbutterfly_lq5,  q4,  d23, d24, d2[1], d2[0] @ q5  = t9,   q4  = 
> t8
> +mbutterfly_lq5,  q4,  d23, d24, d1[1], d1[0] @ q5  = t9,   q4  = 
> t8
>  butterfly_n d31, d24, q3,  q5,  q6,  q5  @ d31 = t1a,  d24 = 
> t9a
>  mbutterfly_lq7,  q6,  d29, d18, d0[3], d0[2] @ q7  = t3,   q6  = 
> t2
>  butterfly_n d16, d23, q2,  q4,  q3,  q4  @ d16 = t0a,  d23 = 
> t8a
>  
> -mbutterfly_lq3,  q2,  d21, d26, d2[3], d2[2] @ q3  = t11,  q2  = 
> t10
> +mbutterfly_lq3,  q2,  d21, d26, d1[3], d1[2] @ q3  = t11,  q2  = 
> t10
>  butterfly_n d29, d26, q7,  q3,  q4,  q3  @ d29 = t3a,  d26 = 
> t11a
> -mbutterfly_lq5,  q4,  d27, d20, d1[1], d1[0] @ q5  = t5,   q4  = 
> t4
> +mbutterfly_lq5,  q4,  d27, d20, d2[1], d2[0] @ q5  = t5,   q4  = 
> t4
>  butterfly_n d18, d21, q6,  q2,  q3,  q2  @ d18 = t2a,  d21 = 
> t10a
>  
>  mbutterfly_lq7,  q6,  d19, d28, d3[1], d3[0] @ q7  = t13,  q6  = 
> t12
>  butterfly_n d20, d28, q5,  q7,  q2,  q7  @ d20 = t5a,  d28 = 
> t13a
> -mbutterfly_lq3,  q2,  d25, d22, d1[3], d1[2] @ q3  = t7,   q2  = 
> t6
> +mbutterfly_lq3,  q2,  d25, d22, d2[3], d2[2] @ q3  = t7,   q2  = 
> t6
>  butterfly_n d27, d19, q4,  q6,  q5,  q6  @ d27 = t4a,  d19 = 
> t12a
>  
>  mbutterfly_lq5,  q4,  d17, d30, d3[3], d3[2] @ q5  = t15,  q4  = 
> t14

ok

Janne
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 3/4] arm: vp9itxfm: Reorder iadst16 coeffs

2017-02-09 Thread Martin Storsjö
This matches the order they are in the 16 bpp version.

There they are in this order, to make sure we access them in the
same order they are declared, easing loading only half of the
coefficients at a time.

This makes the 8 bpp version match the 16 bpp version better.
---
 libavcodec/arm/vp9itxfm_neon.S | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index f74d542..c8eeb76 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -37,8 +37,8 @@ idct_coeffs:
 endconst
 
 const iadst16_coeffs, align=4
-.short  16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
-.short  11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
+.short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
+.short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
 endconst
 
 @ Do four 4x4 transposes, using q registers for the subtransposes that don't
@@ -672,19 +672,19 @@ function iadst16
 vld1.16 {q0-q1}, [r12,:128]
 
 mbutterfly_lq3,  q2,  d31, d16, d0[1], d0[0] @ q3  = t1,   q2  = t0
-mbutterfly_lq5,  q4,  d23, d24, d2[1], d2[0] @ q5  = t9,   q4  = t8
+mbutterfly_lq5,  q4,  d23, d24, d1[1], d1[0] @ q5  = t9,   q4  = t8
 butterfly_n d31, d24, q3,  q5,  q6,  q5  @ d31 = t1a,  d24 = 
t9a
 mbutterfly_lq7,  q6,  d29, d18, d0[3], d0[2] @ q7  = t3,   q6  = t2
 butterfly_n d16, d23, q2,  q4,  q3,  q4  @ d16 = t0a,  d23 = 
t8a
 
-mbutterfly_lq3,  q2,  d21, d26, d2[3], d2[2] @ q3  = t11,  q2  = 
t10
+mbutterfly_lq3,  q2,  d21, d26, d1[3], d1[2] @ q3  = t11,  q2  = 
t10
 butterfly_n d29, d26, q7,  q3,  q4,  q3  @ d29 = t3a,  d26 = 
t11a
-mbutterfly_lq5,  q4,  d27, d20, d1[1], d1[0] @ q5  = t5,   q4  = t4
+mbutterfly_lq5,  q4,  d27, d20, d2[1], d2[0] @ q5  = t5,   q4  = t4
 butterfly_n d18, d21, q6,  q2,  q3,  q2  @ d18 = t2a,  d21 = 
t10a
 
 mbutterfly_lq7,  q6,  d19, d28, d3[1], d3[0] @ q7  = t13,  q6  = 
t12
 butterfly_n d20, d28, q5,  q7,  q2,  q7  @ d20 = t5a,  d28 = 
t13a
-mbutterfly_lq3,  q2,  d25, d22, d1[3], d1[2] @ q3  = t7,   q2  = t6
+mbutterfly_lq3,  q2,  d25, d22, d2[3], d2[2] @ q3  = t7,   q2  = t6
 butterfly_n d27, d19, q4,  q6,  q5,  q6  @ d27 = t4a,  d19 = 
t12a
 
 mbutterfly_lq5,  q4,  d17, d30, d3[3], d3[2] @ q5  = t15,  q4  = 
t14
-- 
2.7.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel