On Wed, Apr 10, 2013 at 07:24:30PM +0300, Martin Storsjö wrote:
> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -270,30 +189,20 @@ void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t
> *dst, uint8_t *src,
> #if HAVE_YASM
>
> /***********************************/
> -/* 3Dnow specific */
> -
> -#define DEF(x) x ## _3dnow
> -
> -#include "dsputil_avg_template.c"
> -
> -#undef DEF
> -
> -/***********************************/
> /* MMXEXT specific */
>
> -#define DEF(x) x ## _mmxext
> -
> -#include "dsputil_avg_template.c"
> -
> -#undef DEF
> +//FIXME the following could be optimized too ...
> +static void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
> + int line_size, int h)
> +{
> + ff_avg_pixels8_mmxext(block, pixels, line_size, h);
> + ff_avg_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
> +}
This is an awkward place for that function and it also uses the
wrong type for the line_size parameter, should be ptrdiff_t.
The comment is also misleading; it should warn that this function is
duplicated.
> diff --git a/libavcodec/x86/dsputil_rnd_template.c
> b/libavcodec/x86/hpeldsp_rnd_template.c
> similarity index 72%
> copy from libavcodec/x86/dsputil_rnd_template.c
> copy to libavcodec/x86/hpeldsp_rnd_template.c
> index 6ce926c..69e9353 100644
> diff --git a/libavcodec/x86/dsputil_rnd_template.c
> b/libavcodec/x86/dsputil_rnd_template.c
> index 6ce926c..d441950 100644
> --- a/libavcodec/x86/dsputil_rnd_template.c
> +++ b/libavcodec/x86/dsputil_rnd_template.c
There now is quite a bit of duplication between these two files:
static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, ptrdiff_t
line_size, int h)
static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, ptrdiff_t
line_size, int h)
static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
All these functions are duplicated.
> --- /dev/null
> +++ b/libavcodec/x86/hpeldsp_init.c
> @@ -0,0 +1,415 @@
> +
> +//#undef NDEBUG
> +//#include <assert.h>
I'd suggest not to carry cruft like this forward.
> +#if HAVE_INLINE_ASM
> +
> +#define JUMPALIGN() __asm__ volatile (".p2align 3"::)
> +#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
> +
> +#define MOVQ_BFE(regd) \
> + __asm__ volatile ( \
> + "pcmpeqd %%"#regd", %%"#regd" \n\t" \
> + "paddb %%"#regd", %%"#regd" \n\t" ::)
> +
> +#ifndef PIC
> +#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" ::
> "m"(ff_bone))
> +#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" ::
> "m"(ff_wtwo))
> +#else
> +// for shared library it's better to use this way for accessing constants
> +// pcmpeqd -> -1
> +#define MOVQ_BONE(regd) \
> + __asm__ volatile ( \
> + "pcmpeqd %%"#regd", %%"#regd" \n\t" \
> + "psrlw $15, %%"#regd" \n\t" \
> + "packuswb %%"#regd", %%"#regd" \n\t" ::)
> +
> +#define MOVQ_WTWO(regd) \
> + __asm__ volatile ( \
> + "pcmpeqd %%"#regd", %%"#regd" \n\t" \
> + "psrlw $15, %%"#regd" \n\t" \
> + "psllw $1, %%"#regd" \n\t"::)
> +
> +#endif
> +
> +// using regr as temporary and for the output result
> +// first argument is unmodifed and second is trashed
> +// regfe is supposed to contain 0xfefefefefefefefe
> +#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
> + "movq "#rega", "#regr" \n\t" \
> + "pand "#regb", "#regr" \n\t" \
> + "pxor "#rega", "#regb" \n\t" \
> + "pand "#regfe", "#regb" \n\t" \
> + "psrlq $1, "#regb" \n\t" \
> + "paddb "#regb", "#regr" \n\t"
> +
> +#define PAVGB_MMX(rega, regb, regr, regfe) \
> + "movq "#rega", "#regr" \n\t" \
> + "por "#regb", "#regr" \n\t" \
> + "pxor "#rega", "#regb" \n\t" \
> + "pand "#regfe", "#regb" \n\t" \
> + "psrlq $1, "#regb" \n\t" \
> + "psubb "#regb", "#regr" \n\t"
> +
> +// mm6 is supposed to contain 0xfefefefefefefefe
> +#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
> + "movq "#rega", "#regr" \n\t" \
> + "movq "#regc", "#regp" \n\t" \
> + "pand "#regb", "#regr" \n\t" \
> + "pand "#regd", "#regp" \n\t" \
> + "pxor "#rega", "#regb" \n\t" \
> + "pxor "#regc", "#regd" \n\t" \
> + "pand %%mm6, "#regb" \n\t" \
> + "pand %%mm6, "#regd" \n\t" \
> + "psrlq $1, "#regb" \n\t" \
> + "psrlq $1, "#regd" \n\t" \
> + "paddb "#regb", "#regr" \n\t" \
> + "paddb "#regd", "#regp" \n\t"
> +
> +#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
> + "movq "#rega", "#regr" \n\t" \
> + "movq "#regc", "#regp" \n\t" \
> + "por "#regb", "#regr" \n\t" \
> + "por "#regd", "#regp" \n\t" \
> + "pxor "#rega", "#regb" \n\t" \
> + "pxor "#regc", "#regd" \n\t" \
> + "pand %%mm6, "#regb" \n\t" \
> + "pand %%mm6, "#regd" \n\t" \
> + "psrlq $1, "#regd" \n\t" \
> + "psrlq $1, "#regb" \n\t" \
> + "psubb "#regb", "#regr" \n\t" \
> + "psubb "#regd", "#regp" \n\t"
All of this is duplicated from dsputil_mmx.c. It could be shared through
dsputil_mmx.h for lack of a better place.
> +static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
> +{
> +#if HAVE_YASM
> + c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
> + c->put_pixels_tab[0][2] = ff_put_pixels16_y2_mmxext;
> +
> + c->avg_pixels_tab[0][0] = ff_avg_pixels16_mmxext;
> + c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_mmxext;
> + c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_mmxext;
> +
> + c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
> + c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
> +
> + c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
> + c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
> + c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
> +
> + if (!(flags & CODEC_FLAG_BITEXACT)) {
> + c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_mmxext;
> + c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_mmxext;
> + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
> + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
> +
> + c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_mmxext;
> + c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
> + }
> +#endif /* HAVE_YASM */
> +
> +#if HAVE_MMXEXT_EXTERNAL
> + if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
> + c->put_no_rnd_pixels_tab[1][1] =
> ff_put_no_rnd_pixels8_x2_exact_mmxext;
> + c->put_no_rnd_pixels_tab[1][2] =
> ff_put_no_rnd_pixels8_y2_exact_mmxext;
> + }
> +#endif /* HAVE_MMXEXT_EXTERNAL */
> +}
Just HAVE_YASM around the function body is enough.
> +static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int mm_flags)
> +{
> +#if HAVE_SSE2_EXTERNAL
> + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
> + // these functions are slower than mmx on AMD, but faster on Intel
> + c->put_pixels_tab[0][0] = ff_put_pixels16_sse2;
> + c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
> + c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2;
> + }
> +#endif /* HAVE_SSE2_EXTERNAL */
> +}
same
Diego
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel