On Mon, Apr 17, 2017 at 6:25 PM, Alexandra Hájková
<[email protected]> wrote:
> ---
>
> Indent operands.
>
> libavcodec/arm/hevc_idct.S | 51
> ++++++++++++++++++++++++++++++++-------
> libavcodec/arm/hevcdsp_init_arm.c | 21 +++++++++++-----
> 2 files changed, 57 insertions(+), 15 deletions(-)
>
> diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
> index 7fdd7cc..f949d80 100644
> --- a/libavcodec/arm/hevc_idct.S
> +++ b/libavcodec/arm/hevc_idct.S
> @@ -30,26 +30,37 @@ const trans, align=4
> .short 57, 43, 25, 9
> endconst
>
> -function ff_hevc_idct_4x4_dc_8_neon, export=1
> +.macro idct_4x4_dc bitdepth
> +function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
> ldrsh r1, [r0]
> + .if \bitdepth == 8
> ldr r2, =0x20
> + .else
> + ldr r2, =0x8
> + .endif
> add r1, #1
> asr r1, #1
> add r1, r2
> - asr r1, #6
> + asr r1, #(14 - \bitdepth)
> vdup.16 q0, r1
> vdup.16 q1, r1
> vst1.16 {q0, q1}, [r0]
> bx lr
> endfunc
> +.endm
>
> -function ff_hevc_idct_8x8_dc_8_neon, export=1
> +.macro idct_8x8_dc bitdepth
> +function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
> ldrsh r1, [r0]
> + .if \bitdepth == 8
> ldr r2, =0x20
> + .else
> + ldr r2, =0x8
> + .endif
> add r1, #1
> asr r1, #1
> add r1, r2
> - asr r1, #6
> + asr r1, #(14 - \bitdepth)
> vdup.16 q8, r1
> vdup.16 q9, r1
> vmov.16 q10, q8
> @@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
> vstm r0, {q8-q15}
> bx lr
> endfunc
> +.endm
>
> -function ff_hevc_idct_16x16_dc_8_neon, export=1
> +.macro idct_16x16_dc bitdepth
> +function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
> ldrsh r1, [r0]
> + .if \bitdepth == 8
> ldr r2, =0x20
> + .else
> + ldr r2, =0x8
> + .endif
> add r1, #1
> asr r1, #1
> add r1, r2
> - asr r1, #6
> + asr r1, #(14 - \bitdepth)
> vdup.16 q8, r1
> vdup.16 q9, r1
> vmov.16 q10, q8
> @@ -83,14 +100,21 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
> vstm r0, {q8-q15}
> bx lr
> endfunc
> +.endm
>
> -function ff_hevc_idct_32x32_dc_8_neon, export=1
> +.macro idct_32x32_dc bitdepth
> +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
> ldrsh r1, [r0]
> ldr r2, =0x20
> + .if \bitdepth == 8
> + ldr r2, =0x20
> + .else
> + ldr r2, =0x8
> + .endif
This doesn't look quite right, shouldn't the new block replace/wrap
the old ldr instruction, like it does in the 16x16 version (and all
other sizes)?
> add r1, #1
> asr r1, #1
> add r1, r2
> - asr r1, #6
> + asr r1, #(14 - \bitdepth)
> mov r3, #16
> vdup.16 q8, r1
> vdup.16 q9, r1
> @@ -103,8 +127,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
> 1: subs r3, #1
> vstm r0!, {q8-q15}
> bne 1b
> - bx lr
> + bx lr
> endfunc
> +.endm
>
> .macro sum_sub out, in, c, op
> .ifc \op, +
> @@ -496,8 +521,16 @@ tr_16x4 secondpass_10, 20 - 10
> .ltorg
>
> idct_4x4 8
> +idct_4x4_dc 8
> idct_4x4 10
> +idct_4x4_dc 10
> idct_8x8 8
> +idct_8x8_dc 8
> idct_8x8 10
> +idct_8x8_dc 10
> idct_16x16 8
> +idct_16x16_dc 8
> idct_16x16 10
> +idct_16x16_dc 10
> +idct_32x32_dc 8
> +idct_32x32_dc 10
> diff --git a/libavcodec/arm/hevcdsp_init_arm.c
> b/libavcodec/arm/hevcdsp_init_arm.c
> index b65e2e9..febbcc1 100644
> --- a/libavcodec/arm/hevcdsp_init_arm.c
> +++ b/libavcodec/arm/hevcdsp_init_arm.c
> @@ -25,13 +25,18 @@
>
> #include "libavcodec/hevcdsp.h"
>
> -void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
> void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
> -void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
> void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
> -void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
> void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
> void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
> +void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
> +void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
> +void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
> +void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
> +
> +void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
> +void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
> +void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
> void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
> void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
> void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
> @@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int
> bit_depth)
> c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
> }
> if (bit_depth == 10) {
> - c->idct[0] = ff_hevc_idct_4x4_10_neon;
> - c->idct[1] = ff_hevc_idct_8x8_10_neon;
> - c->idct[2] = ff_hevc_idct_16x16_10_neon;
> + c->idct[0] = ff_hevc_idct_4x4_10_neon;
> + c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
> + c->idct[1] = ff_hevc_idct_8x8_10_neon;
> + c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
> + c->idct[2] = ff_hevc_idct_16x16_10_neon;
> + c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
> + c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
> }
> }
> }
> --
> 2.10.2
>
> _______________________________________________
> libav-devel mailing list
> [email protected]
> https://lists.libav.org/mailman/listinfo/libav-devel
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel