On Mon, May 21, 2012 at 01:06:51PM -0400, Justin Ruggles wrote:
> Move vector_fmul() from DSPContext to AVFloatDSPContext.
> Some ppc and arm files with utility functions and macros had to also be
> moved to libavutil.
> ---
> The arm and ppc parts are completely untested. I'll need some help with
> that since I do not have access to any arm or ppc systems.
> 
> All the other float utility functions in DSPContext will be moved as well.
> I just decided to start with vector_fmul() because it has the most use.
> Also note that I left out the 3DNow version. It's the same speed as SSE
> on my Athlon64, so I didn't see any need to keep it.

Hmm, should we really leave those K6-* processors behind?

> --- /dev/null
> +++ b/libavutil/arm/float_dsp_arm.h
> @@ -0,0 +1,29 @@
> +
> +#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H
> +#define AVUTIL_ARM_FLOAT_DSP_ARM_H
> +
> +#include "libavutil/float_dsp.h"
> +
> +void ff_dsputil_init_vfp (AVFloatDSPContext *fdsp);
> +void ff_dsputil_init_neon(AVFloatDSPContext *fdsp);

nit: no space between function name and '('

> --- /dev/null
> +++ b/libavutil/arm/float_dsp_init_arm.c
> @@ -0,0 +1,32 @@
> +
> +void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp)
> +{
> +    int cpu_flags = av_get_cpu_flags();
> +
> +    if (have_vfp(cpu_flags))  ff_dsputil_init_vfp(fdsp);
> +    if (have_neon(cpu_flags)) ff_dsputil_init_neon(fdsp);

nit: break the lines

> --- /dev/null
> +++ b/libavutil/arm/float_dsp_init_neon.c
> @@ -0,0 +1,31 @@
> +
> +void ff_dsputil_init_neon(AVFloatDSPContext *fdsp)
> +{
> +    c->vector_fmul                = ff_vector_fmul_neon;

nit: weird spacing

> --- /dev/null
> +++ b/libavutil/float_dsp.c
> @@ -0,0 +1,42 @@
> +
> +void av_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
> +{
> +    fdsp->vector_fmul = vector_fmul_c;
> +
> +#if ARCH_ARM
> +    ff_float_dsp_init_arm(fdsp);
> +#elif HAVE_ALTIVEC
> +    ff_float_dsp_init_ppc(fdsp, bit_exact);
> +#elif ARCH_X86
> +    ff_float_dsp_init_x86(fdsp);
> +#endif

This should not check for HAVE_ALTIVEC, but for ARCH_PPC instead.
The ppc init code should then decide what optimized functions to
enable depending on the arch-specific optimizations available.

> --- /dev/null
> +++ b/libavutil/ppc/float_dsp_altivec.c
> @@ -0,0 +1,44 @@
> +
> +static void vector_fmul_altivec(float *dst, const float *src0,
> +                                const float *src1, int len)
> +{
> +    int i;
> +    vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
> +    for (i = 0; i < len - 7; i += 8) {
> +        d0 = vec_ld( 0, src0 + i);
> +        s  = vec_ld( 0, src1 + i);
> +        d1 = vec_ld(16, src0 + i);
> +        d0 = vec_madd(d0, s, zero);
> +        d1 = vec_madd(d1, vec_ld(16, src1 + i), zero);
> +        vec_st(d0,  0, dst + i);
> +        vec_st(d1, 16, dst + i);
> +    }
> +}
> +
> +void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
> +{
> +    fdsp->vector_fmul = vector_fmul_altivec;
> +}

Splitting this file into an init part and an altivec part should do
the trick.

Diego
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to