On 2017-02-09 14:33:56 +0200, Martin Storsjö wrote:
> This matches the order they are in the 16 bpp version.
>
> There they are in this order, to make sure we access them in the
> same order they are declared, easing loading only half of the
> coefficients at a time.
>
> This makes the 8 bpp version match the 16 bpp version better.
> ---
> libavcodec/aarch64/vp9itxfm_neon.S | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/aarch64/vp9itxfm_neon.S
> b/libavcodec/aarch64/vp9itxfm_neon.S
> index f87f6bd..7b7dbd4 100644
> --- a/libavcodec/aarch64/vp9itxfm_neon.S
> +++ b/libavcodec/aarch64/vp9itxfm_neon.S
> @@ -37,8 +37,8 @@ idct_coeffs:
> endconst
>
> const iadst16_coeffs, align=4
> - .short 16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
> - .short 11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
> + .short 16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
> + .short 14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
> endconst
>
> // out1 = ((in1 + in2) * d0[0] + (1 << 13)) >> 14
> @@ -622,19 +622,19 @@ function iadst16
> ld1 {v0.8h,v1.8h}, [x11]
>
> dmbutterfly_l v6, v7, v4, v5, v31, v16, v0.h[1], v0.h[0] //
> v6,v7 = t1, v4,v5 = t0
> - dmbutterfly_l v10, v11, v8, v9, v23, v24, v1.h[1], v1.h[0] //
> v10,v11 = t9, v8,v9 = t8
> + dmbutterfly_l v10, v11, v8, v9, v23, v24, v0.h[5], v0.h[4] //
> v10,v11 = t9, v8,v9 = t8
> dbutterfly_n v31, v24, v6, v7, v10, v11, v12, v13, v10, v11 //
> v31 = t1a, v24 = t9a
> dmbutterfly_l v14, v15, v12, v13, v29, v18, v0.h[3], v0.h[2] //
> v14,v15 = t3, v12,v13 = t2
> dbutterfly_n v16, v23, v4, v5, v8, v9, v6, v7, v8, v9 //
> v16 = t0a, v23 = t8a
>
> - dmbutterfly_l v6, v7, v4, v5, v21, v26, v1.h[3], v1.h[2] //
> v6,v7 = t11, v4,v5 = t10
> + dmbutterfly_l v6, v7, v4, v5, v21, v26, v0.h[7], v0.h[6] //
> v6,v7 = t11, v4,v5 = t10
> dbutterfly_n v29, v26, v14, v15, v6, v7, v8, v9, v6, v7 //
> v29 = t3a, v26 = t11a
> - dmbutterfly_l v10, v11, v8, v9, v27, v20, v0.h[5], v0.h[4] //
> v10,v11 = t5, v8,v9 = t4
> + dmbutterfly_l v10, v11, v8, v9, v27, v20, v1.h[1], v1.h[0] //
> v10,v11 = t5, v8,v9 = t4
> dbutterfly_n v18, v21, v12, v13, v4, v5, v6, v7, v4, v5 //
> v18 = t2a, v21 = t10a
>
> dmbutterfly_l v14, v15, v12, v13, v19, v28, v1.h[5], v1.h[4] //
> v14,v15 = t13, v12,v13 = t12
> dbutterfly_n v20, v28, v10, v11, v14, v15, v4, v5, v14, v15 //
> v20 = t5a, v28 = t13a
> - dmbutterfly_l v6, v7, v4, v5, v25, v22, v0.h[7], v0.h[6] //
> v6,v7 = t7, v4,v5 = t6
> + dmbutterfly_l v6, v7, v4, v5, v25, v22, v1.h[3], v1.h[2] //
> v6,v7 = t7, v4,v5 = t6
> dbutterfly_n v27, v19, v8, v9, v12, v13, v10, v11, v12, v13 //
> v27 = t4a, v19 = t12a
>
> dmbutterfly_l v10, v11, v8, v9, v17, v30, v1.h[7], v1.h[6] //
> v10,v11 = t15, v8,v9 = t14
ok
Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel