Re: [libav-devel] [PATCH v2] dsputil: x86: Convert mpeg4 qpel and dsputil avg to yasm

Diego Biurrun Sat, 26 Jan 2013 00:22:47 -0800

On Sat, Jan 26, 2013 at 12:32:16AM -0500, Daniel Kang wrote:
> --- a/libavcodec/x86/dsputil.asm
> +++ b/libavcodec/x86/dsputil.asm
> @@ -879,3 +884,984 @@ cglobal avg_pixels16, 4,5,4
> +
> +; HPEL mmxext
> +%macro PAVGB_OP 2
> +%if cpuflag(3dnow)
> +    pavgusb %1, %2
> +%else
> +    pavgb   %1, %2
> +%endif
> +%endmacro


We have a macro for this in x86util.asm and it works the other way around.
I'm very suspicious of this doing the right thing on CPUs with mmxext and
3dnow ...

> +; mpeg4 qpel
> +
> +%macro MPEG4_QPEL16_H_LOWPASS 1
> +cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8

So it seems like dsputil.asm is becoming the new dumping ground for
functions of all kind.  It doubles in size after your patch and at
around 2k lines it starts to work against our current efforts of
splitting dsputil into sensibly-sized pieces.  If you continue your
porting efforts, it will probably end up around 5k lines or so.

Whenever there is an opportunity to make dsputil less monolithic comes
up, we should exploit it.  That seems to be the case here.

> +%macro QPEL_V_LOW 5
> +    paddw      m0, m1
> +    mova       m4, [pw_20]
> +    pmullw     m4, m0
> +    mova       m0, %4
> +    mova       m5, %1
> +    paddw      m5, m0
> +    psubw      m4, m5
> +    mova       m5, %2
> +    mova       m6, %3
> +    paddw      m5, m3
> +    paddw      m6, m2
> +    paddw      m6, m6
> +    psubw      m5, m6
> +    pmullw     m5, [pw_3]
> +    paddw      m4, [PW_ROUND]
> +    paddw      m5, m4
> +    psraw      m5, 5
> +    packuswb   m5, m5
> +    OP_MOV     %5, m5, m7
> +    SWAP 0,1,2,3
> +%endmacro

nit: SWAP is not special, format its arguments like the rest of the
macro instructions.

> --- a/libavcodec/x86/dsputil_avg_template.c
> +++ b/libavcodec/x86/dsputil_avg_template.c
> @@ -24,781 +24,32 @@
>  //FIXME the following could be optimized too ...
> +static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t 
> *pixels, int line_size, int h){
> +    DEF(ff_put_no_rnd_pixels8_x2)(block  , pixels  , line_size, h);
> +    DEF(ff_put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
>  }
> +static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, 
> int line_size, int h){
> +    DEF(ff_put_pixels8_y2)(block  , pixels  , line_size, h);
> +    DEF(ff_put_pixels8_y2)(block+8, pixels+8, line_size, h);
>  }
> +static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t 
> *pixels, int line_size, int h){
> +    DEF(ff_put_no_rnd_pixels8_y2)(block  , pixels  , line_size, h);
> +    DEF(ff_put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
>  }
> +static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, int 
> line_size, int h){
> +    DEF(ff_avg_pixels8)(block  , pixels  , line_size, h);
> +    DEF(ff_avg_pixels8)(block+8, pixels+8, line_size, h);
>  }
> +static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, 
> int line_size, int h){
> +    DEF(ff_avg_pixels8_x2)(block  , pixels  , line_size, h);
> +    DEF(ff_avg_pixels8_x2)(block+8, pixels+8, line_size, h);
>  }
> +static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, 
> int line_size, int h){
> +    DEF(ff_avg_pixels8_y2)(block  , pixels  , line_size, h);
> +    DEF(ff_avg_pixels8_y2)(block+8, pixels+8, line_size, h);
>  }
> +static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, 
> int line_size, int h){
> +    DEF(ff_avg_pixels8_xy2)(block  , pixels  , line_size, h);
> +    DEF(ff_avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
>  }

If you feel motivated, you could fix the formatting as you are changing
all lines anyway.

> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -80,6 +80,143 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_FE)   = { 
> 0xFEFEFEFEFEFEFEFEULL, 0xFEF
> +
> +#if HAVE_YASM
> +/* VC-1-specific */
> +#define ff_put_pixels8_mmx ff_put_pixels8_mmxext
> +void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
> +                               int stride, int rnd)
> +{
> +    ff_put_pixels8_mmx(dst, src, stride, 8);
> +}
> +
> +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
> +                                  int stride, int rnd)
> +{
> +    ff_avg_pixels8_mmxext(dst, src, stride, 8);
> +}
> +
> +
> +/***********************************/
> +/* 3Dnow specific */
> +
> +#define DEF(x) x ## _3dnow
> +
> +#include "dsputil_avg_template.c"
> +
> +#undef DEF
> +
> +/***********************************/
> +/* MMXEXT specific */
> +
> +#define DEF(x) x ## _mmxext
> +
> +#include "dsputil_avg_template.c"
> +
> +#undef DEF
> +
> +#endif /* HAVE_YASM */

Please keep these blocks where they are for now to make the patch
more readable.  We can move them around later.

Diego
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH v2] dsputil: x86: Convert mpeg4 qpel and dsputil avg to yasm

Reply via email to