On Mon, Apr 17, 2017 at 6:25 PM, Alexandra Hájková
<[email protected]> wrote:
> ---
>
> Indent operands.
>
>  libavcodec/arm/hevc_idct.S        | 51 
> ++++++++++++++++++++++++++++++++-------
>  libavcodec/arm/hevcdsp_init_arm.c | 21 +++++++++++-----
>  2 files changed, 57 insertions(+), 15 deletions(-)
>
> diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
> index 7fdd7cc..f949d80 100644
> --- a/libavcodec/arm/hevc_idct.S
> +++ b/libavcodec/arm/hevc_idct.S
> @@ -30,26 +30,37 @@ const trans, align=4
>          .short 57, 43, 25, 9
>  endconst
>
> -function ff_hevc_idct_4x4_dc_8_neon, export=1
> +.macro idct_4x4_dc bitdepth
> +function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
>          ldrsh           r1, [r0]
> +    .if \bitdepth == 8
>          ldr             r2, =0x20
> +    .else
> +        ldr             r2, =0x8
> +    .endif
>          add             r1, #1
>          asr             r1, #1
>          add             r1, r2
> -        asr             r1, #6
> +        asr             r1, #(14 - \bitdepth)
>          vdup.16         q0, r1
>          vdup.16         q1, r1
>          vst1.16         {q0, q1}, [r0]
>          bx              lr
>  endfunc
> +.endm
>
> -function ff_hevc_idct_8x8_dc_8_neon, export=1
> +.macro idct_8x8_dc bitdepth
> +function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
>          ldrsh           r1, [r0]
> +    .if \bitdepth == 8
>          ldr             r2, =0x20
> +    .else
> +        ldr             r2, =0x8
> +    .endif
>          add             r1, #1
>          asr             r1, #1
>          add             r1, r2
> -        asr             r1, #6
> +        asr             r1, #(14 - \bitdepth)
>          vdup.16         q8, r1
>          vdup.16         q9, r1
>          vmov.16         q10, q8
> @@ -61,14 +72,20 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
>          vstm            r0, {q8-q15}
>          bx              lr
>  endfunc
> +.endm
>
> -function ff_hevc_idct_16x16_dc_8_neon, export=1
> +.macro idct_16x16_dc bitdepth
> +function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
>          ldrsh           r1, [r0]
> +    .if \bitdepth == 8
>          ldr             r2, =0x20
> +    .else
> +        ldr             r2, =0x8
> +    .endif
>          add             r1, #1
>          asr             r1, #1
>          add             r1, r2
> -        asr             r1, #6
> +        asr             r1, #(14 - \bitdepth)
>          vdup.16         q8, r1
>          vdup.16         q9, r1
>          vmov.16         q10, q8
> @@ -83,14 +100,21 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
>          vstm            r0, {q8-q15}
>          bx              lr
>  endfunc
> +.endm
>
> -function ff_hevc_idct_32x32_dc_8_neon, export=1
> +.macro idct_32x32_dc bitdepth
> +function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
>          ldrsh           r1, [r0]
>          ldr             r2, =0x20
> +    .if \bitdepth == 8
> +        ldr             r2, =0x20
> +    .else
> +        ldr             r2, =0x8
> +    .endif

This doesn't look quite right, shouldn't the new block replace/wrap
the old ldr instruction, like it does in the 16x16 version (and all
other sizes)?

>          add             r1, #1
>          asr             r1, #1
>          add             r1, r2
> -        asr             r1, #6
> +        asr             r1, #(14 - \bitdepth)
>          mov             r3, #16
>          vdup.16         q8, r1
>          vdup.16         q9, r1
> @@ -103,8 +127,9 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
>  1:      subs            r3, #1
>          vstm            r0!, {q8-q15}
>          bne             1b
> -        bx lr
> +        bx              lr
>  endfunc
> +.endm
>
>  .macro sum_sub out, in, c, op
>    .ifc \op, +
> @@ -496,8 +521,16 @@ tr_16x4 secondpass_10, 20 - 10
>  .ltorg
>
>  idct_4x4 8
> +idct_4x4_dc 8
>  idct_4x4 10
> +idct_4x4_dc 10
>  idct_8x8 8
> +idct_8x8_dc 8
>  idct_8x8 10
> +idct_8x8_dc 10
>  idct_16x16 8
> +idct_16x16_dc 8
>  idct_16x16 10
> +idct_16x16_dc 10
> +idct_32x32_dc 8
> +idct_32x32_dc 10
> diff --git a/libavcodec/arm/hevcdsp_init_arm.c 
> b/libavcodec/arm/hevcdsp_init_arm.c
> index b65e2e9..febbcc1 100644
> --- a/libavcodec/arm/hevcdsp_init_arm.c
> +++ b/libavcodec/arm/hevcdsp_init_arm.c
> @@ -25,13 +25,18 @@
>
>  #include "libavcodec/hevcdsp.h"
>
> -void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
>  void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
> -void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
>  void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
> -void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
>  void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
>  void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
> +void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
> +void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
> +void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
> +void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
> +
> +void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
> +void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
> +void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
>  void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
>  void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
>  void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
> @@ -51,9 +56,13 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int 
> bit_depth)
>              c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
>          }
>          if (bit_depth == 10) {
> -            c->idct[0] = ff_hevc_idct_4x4_10_neon;
> -            c->idct[1] = ff_hevc_idct_8x8_10_neon;
> -            c->idct[2] = ff_hevc_idct_16x16_10_neon;
> +            c->idct[0]    = ff_hevc_idct_4x4_10_neon;
> +            c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
> +            c->idct[1]    = ff_hevc_idct_8x8_10_neon;
> +            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
> +            c->idct[2]    = ff_hevc_idct_16x16_10_neon;
> +            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
> +            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
>          }
>      }
>  }
> --
> 2.10.2
>
> _______________________________________________
> libav-devel mailing list
> [email protected]
> https://lists.libav.org/mailman/listinfo/libav-devel
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to