On Thu, Mar 24, 2011 at 03:34:32PM +0000, Mans Rullgard wrote:
> ---
>  libavcodec/ac3dsp.c              |    2 +
>  libavcodec/ac3dsp.h              |    1 +
>  libavcodec/arm/Makefile          |    3 ++
>  libavcodec/arm/ac3dsp_init_arm.c |   35 ++++++++++++++++++++++
>  libavcodec/arm/ac3dsp_neon.S     |   59 
> ++++++++++++++++++++++++++++++++++++++
>  5 files changed, 100 insertions(+), 0 deletions(-)
>  create mode 100644 libavcodec/arm/ac3dsp_init_arm.c
>  create mode 100644 libavcodec/arm/ac3dsp_neon.S
> 
[...]
> diff --git a/libavcodec/arm/ac3dsp_neon.S b/libavcodec/arm/ac3dsp_neon.S
> new file mode 100644
> index 0000000..e69a3c2
> --- /dev/null
> +++ b/libavcodec/arm/ac3dsp_neon.S
> @@ -0,0 +1,59 @@
> +/*
> + * Copyright (c) 2011 Mans Rullgard <[email protected]>
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#include "asm.S"
> +
> +function ff_ac3_max_msb_abs_int16_neon, export=1
> +        vmov.i16        q0,  #0
> +        vmov.i16        q2,  #0
> +1:      vld1.16         {q1},     [r0,:128]!
> +        vabs.s16        q1,  q1
> +        vld1.16         {q3},     [r0,:128]!
> +        vabs.s16        q3,  q3
> +        vorr            q0,  q0,  q1
> +        vorr            q2,  q2,  q3
> +        subs            r1,  r1,  #16
> +        bgt             1b
> +        vorr            q0,  q0,  q2
> +        vorr            d0,  d0,  d1
> +        vpmax.u16       d0,  d0,  d0
> +        vpmax.u16       d0,  d0,  d0
> +        vmov.u16        r0,  d0[0]
> +        bx              lr
> +endfunc

this seems to be slightly incorrect (say, we have 2 and 1 as input, max will
be 3)
does that affect anything?

> +function ff_ac3_exponent_min_neon, export=1
> +        cmp             r1,  #0
> +        bxeq            lr
> +        push            {lr}
> +        mov             r12, #256
> +1:
> +        vld1.8          {q0},     [r0,:128]
> +        mov             lr,  r1
> +        add             r3,  r0,  #256
> +2:      vld1.8          {q1},     [r3,:128], r12
> +        subs            lr,  lr,  #1
> +        vmin.u8         q0,  q0,  q1
> +        bgt             2b
> +        subs            r2,  r2,  #16
> +        vst1.8          {q0},     [r0,:128]!
> +        bgt             1b
> +        pop             {pc}
> +endfunc

nice hack but otherwise looks ok
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to