Janne Grunau <janne-li...@jannau.net> writes:

> diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S
> index 1e8d4b4..715c242 100644
> --- a/libavcodec/arm/rv34dsp_neon.S
> +++ b/libavcodec/arm/rv34dsp_neon.S
> @@ -21,11 +21,11 @@
>  #include "asm.S"
>
>  .macro rv34_inv_transform
> -        mov             r1,  #16
> -        vld1.16         {d28}, [r0,:64], r1     @ block[i+8*0]
> -        vld1.16         {d29}, [r0,:64], r1     @ block[i+8*1]
> -        vld1.16         {d30}, [r0,:64], r1     @ block[i+8*2]
> -        vld1.16         {d31}, [r0,:64], r1     @ block[i+8*3]
> +        mov             r1,  #8
> +        vld1.16         {d28}, [r0,:64], r1     @ block[i+4*0]
> +        vld1.16         {d29}, [r0,:64], r1     @ block[i+4*1]
> +        vld1.16         {d30}, [r0,:64], r1     @ block[i+4*2]
> +        vld1.16         {d31}, [r0,:64], r1     @ block[i+4*3]

All of that can be replaced with a single vld1.16 {q14-q15}, [r0,:128]
This leaves r0 unchanged, so the code to copy/restore (forgot how it's
done) the old value can also be removed.


[...]

>  /* void rv34_inv_transform_dc_noround_c(DCTELEM *block) */
>  function ff_rv34_inv_transform_noround_dc_neon, export=1
>          vld1.16         {d28[]}, [r0,:16]       @ block[0]
>          vmov.i16        d4,  #251
> -        vorr.s16        d4,  #256               @ 13^2 * 3
> +        vorr.s16        d4,  #256               @ 13^2 *

Accident?

> -        mov             r1,  #16
> +        mov             r1,  #8
>          vmull.s16       q3,  d28, d4
>          vshrn.s32       d0,  q3,  #11
>          vst1.64         {d0}, [r0,:64], r1

Copy d0 to d1 and vst1.16 {q0}, [r0,:128]! twice instead.

-- 
Måns Rullgård
m...@mansr.com
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to