Re: [libav-devel] [PATCH v2] dsputil: x86: Convert mpeg4 qpel and dsputil avg to yasm

Daniel Kang Sat, 26 Jan 2013 10:01:33 -0800

On Sat, Jan 26, 2013 at 3:23 AM, Diego Biurrun <[email protected]> wrote:
> On Sat, Jan 26, 2013 at 12:32:16AM -0500, Daniel Kang wrote:
>> --- a/libavcodec/x86/dsputil.asm
>> +++ b/libavcodec/x86/dsputil.asm
>> @@ -879,3 +884,984 @@ cglobal avg_pixels16, 4,5,4
>> +
>> +; HPEL mmxext
>> +%macro PAVGB_OP 2
>> +%if cpuflag(3dnow)
>> +    pavgusb %1, %2
>> +%else
>> +    pavgb   %1, %2
>> +%endif
>> +%endmacro
>
> We have a macro for this in x86util.asm and it works the other way around.
> I'm very suspicious of this doing the right thing on CPUs with mmxext and
> 3dnow ...


You're probably right. Fixed.

>> +; mpeg4 qpel
>> +
>> +%macro MPEG4_QPEL16_H_LOWPASS 1
>> +cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8
>
> So it seems like dsputil.asm is becoming the new dumping ground for
> functions of all kind.  It doubles in size after your patch and at
> around 2k lines it starts to work against our current efforts of
> splitting dsputil into sensibly-sized pieces.  If you continue your
> porting efforts, it will probably end up around 5k lines or so.
>
> Whenever there is an opportunity to make dsputil less monolithic comes
> up, we should exploit it.  That seems to be the case here.

I was trying to avoid drama and bikeshedding re: file names and save
that for another patch. I guess I could split it in this patch if you
want.

>> +%macro QPEL_V_LOW 5
>> +    paddw      m0, m1
>> +    mova       m4, [pw_20]
>> +    pmullw     m4, m0
>> +    mova       m0, %4
>> +    mova       m5, %1
>> +    paddw      m5, m0
>> +    psubw      m4, m5
>> +    mova       m5, %2
>> +    mova       m6, %3
>> +    paddw      m5, m3
>> +    paddw      m6, m2
>> +    paddw      m6, m6
>> +    psubw      m5, m6
>> +    pmullw     m5, [pw_3]
>> +    paddw      m4, [PW_ROUND]
>> +    paddw      m5, m4
>> +    psraw      m5, 5
>> +    packuswb   m5, m5
>> +    OP_MOV     %5, m5, m7
>> +    SWAP 0,1,2,3
>> +%endmacro
>
> nit: SWAP is not special, format its arguments like the rest of the
> macro instructions.

I disagree on this one, I think SWAP is special.

>> --- a/libavcodec/x86/dsputil_avg_template.c
>> +++ b/libavcodec/x86/dsputil_avg_template.c
>> @@ -24,781 +24,32 @@
>>  //FIXME the following could be optimized too ...
>> +static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t 
>> *pixels, int line_size, int h){
>> +    DEF(ff_put_no_rnd_pixels8_x2)(block  , pixels  , line_size, h);
>> +    DEF(ff_put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
>>  }
>> +static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, 
>> int line_size, int h){
>> +    DEF(ff_put_pixels8_y2)(block  , pixels  , line_size, h);
>> +    DEF(ff_put_pixels8_y2)(block+8, pixels+8, line_size, h);
>>  }
>> +static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t 
>> *pixels, int line_size, int h){
>> +    DEF(ff_put_no_rnd_pixels8_y2)(block  , pixels  , line_size, h);
>> +    DEF(ff_put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
>>  }
>> +static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, int 
>> line_size, int h){
>> +    DEF(ff_avg_pixels8)(block  , pixels  , line_size, h);
>> +    DEF(ff_avg_pixels8)(block+8, pixels+8, line_size, h);
>>  }
>> +static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, 
>> int line_size, int h){
>> +    DEF(ff_avg_pixels8_x2)(block  , pixels  , line_size, h);
>> +    DEF(ff_avg_pixels8_x2)(block+8, pixels+8, line_size, h);
>>  }
>> +static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, 
>> int line_size, int h){
>> +    DEF(ff_avg_pixels8_y2)(block  , pixels  , line_size, h);
>> +    DEF(ff_avg_pixels8_y2)(block+8, pixels+8, line_size, h);
>>  }
>> +static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, 
>> int line_size, int h){
>> +    DEF(ff_avg_pixels8_xy2)(block  , pixels  , line_size, h);
>> +    DEF(ff_avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
>>  }
>
> If you feel motivated, you could fix the formatting as you are changing
> all lines anyway.

Fixed.

>> --- a/libavcodec/x86/dsputil_mmx.c
>> +++ b/libavcodec/x86/dsputil_mmx.c
>> @@ -80,6 +80,143 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_FE)   = { 
>> 0xFEFEFEFEFEFEFEFEULL, 0xFEF
>> +
>> +#if HAVE_YASM
>> +/* VC-1-specific */
>> +#define ff_put_pixels8_mmx ff_put_pixels8_mmxext
>> +void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
>> +                               int stride, int rnd)
>> +{
>> +    ff_put_pixels8_mmx(dst, src, stride, 8);
>> +}
>> +
>> +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
>> +                                  int stride, int rnd)
>> +{
>> +    ff_avg_pixels8_mmxext(dst, src, stride, 8);
>> +}
>> +
>> +
>> +/***********************************/
>> +/* 3Dnow specific */
>> +
>> +#define DEF(x) x ## _3dnow
>> +
>> +#include "dsputil_avg_template.c"
>> +
>> +#undef DEF
>> +
>> +/***********************************/
>> +/* MMXEXT specific */
>> +
>> +#define DEF(x) x ## _mmxext
>> +
>> +#include "dsputil_avg_template.c"
>> +
>> +#undef DEF
>> +
>> +#endif /* HAVE_YASM */
>
> Please keep these blocks where they are for now to make the patch
> more readable.  We can move them around later.

Fixed.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH v2] dsputil: x86: Convert mpeg4 qpel and dsputil avg to yasm

Reply via email to