On 2016-11-13 00:36:56 +0200, Martin Storsjö wrote:
> ---
> This comes from the review of the aarch64 version.
> ---
> libavcodec/arm/vp9itxfm_neon.S | 45
> ++++++++++--------------------------------
> 1 file changed, 10 insertions(+), 35 deletions(-)
>
> diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
> index fca9836..cdb43b5 100644
> --- a/libavcodec/arm/vp9itxfm_neon.S
> +++ b/libavcodec/arm/vp9itxfm_neon.S
> @@ -258,8 +258,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
> .endif
>
> vmov.i16 q15, #0
> -.ifc \txfm1,idct
> -.ifc \txfm2,idct
> +.ifc \txfm1\()_\txfm2,idct_idct
> cmp r3, #1
> bne 1f
> @ DC-only for idct/idct
> @@ -273,7 +272,6 @@ function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
> vmov q3, q2
> b 2f
> .endif
> -.endif
>
> 1:
> vld1.16 {d4-d7}, [r2,:128]
> @@ -386,29 +384,21 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon,
> export=1
> @ if only idct is involved.
> @ The iadst also uses a few coefficients from
> @ idct, so those always need to be loaded.
> -.ifc \txfm1,iadst
> - movrel r12, iadst8_coeffs
> - vld1.16 {q1}, [r12,:128]!
> - vpush {q4-q7}
> +.ifc \txfm1\()_\txfm2,idct_idct
> + movrel r12, idct_coeffs
> + vpush {q4-q5}
> vld1.16 {q0}, [r12,:128]
> .else
> -.ifc \txfm2,iadst
> movrel r12, iadst8_coeffs
> vld1.16 {q1}, [r12,:128]!
> vpush {q4-q7}
> vld1.16 {q0}, [r12,:128]
> -.else
> - movrel r12, idct_coeffs
> - vpush {q4-q5}
> - vld1.16 {q0}, [r12,:128]
> -.endif
> .endif
>
> vmov.i16 q2, #0
> vmov.i16 q3, #0
>
> -.ifc \txfm1,idct
> -.ifc \txfm2,idct
> +.ifc \txfm1\()_\txfm2,idct_idct
> cmp r3, #1
> bne 1f
> @ DC-only for idct/idct
> @@ -428,7 +418,6 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
> vst1.16 {d4[0]}, [r2,:16]
> b 2f
> .endif
> -.endif
> 1:
> vld1.16 {q8-q9}, [r2,:128]!
> vld1.16 {q10-q11}, [r2,:128]!
> @@ -497,14 +486,10 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon,
> export=1
> vst1.8 {d10}, [r3,:64], r1
> vst1.8 {d11}, [r3,:64], r1
>
> -.ifc \txfm1,iadst
> - vpop {q4-q7}
> +.ifc \txfm1\()_\txfm2,idct_idct
> + vpop {q4-q5}
> .else
> -.ifc \txfm2,iadst
> vpop {q4-q7}
> -.else
> - vpop {q4-q5}
> -.endif
> .endif
> bx lr
> endfunc
> @@ -798,20 +783,14 @@ itxfm16_1d_funcs iadst
>
> .macro itxfm_func16x16 txfm1, txfm2
> function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
> -.ifc \txfm1,idct
> -.ifc \txfm2,idct
> +.ifc \txfm1\()_\txfm2,idct_idct
> cmp r3, #1
> beq idct16x16_dc_add_neon
> .endif
> -.endif
> push {r4-r7,lr}
> -.ifc \txfm1,iadst
> - vpush {q4-q7}
> -.else
> -.ifc \txfm2,iadst
> +.ifnc \txfm1\()_\txfm2,idct_idct
> vpush {q4-q7}
> .endif
> -.endif
> mov r7, sp
>
> @ Align the stack, allocate a temp buffer
> @@ -850,13 +829,9 @@ A sub sp, sp, #512
> .endr
>
> mov sp, r7
> -.ifc \txfm1,iadst
> - vpop {q4-q7}
> -.else
> -.ifc \txfm2,iadst
> +.ifnc \txfm1\()_\txfm2,idct_idct
> vpop {q4-q7}
> .endif
> -.endif
> pop {r4-r7,pc}
> endfunc
> .endm
ok
Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel