On Sat, Jan 26, 2013 at 3:23 AM, Diego Biurrun <[email protected]> wrote:
> On Sat, Jan 26, 2013 at 12:32:16AM -0500, Daniel Kang wrote:
>> --- a/libavcodec/x86/dsputil.asm
>> +++ b/libavcodec/x86/dsputil.asm
>> @@ -879,3 +884,984 @@ cglobal avg_pixels16, 4,5,4
>> +
>> +; HPEL mmxext
>> +%macro PAVGB_OP 2
>> +%if cpuflag(3dnow)
>> + pavgusb %1, %2
>> +%else
>> + pavgb %1, %2
>> +%endif
>> +%endmacro
>
> We have a macro for this in x86util.asm and it works the other way around.
> I'm very suspicious of this doing the right thing on CPUs with mmxext and
> 3dnow ...
You're probably right. Fixed.
>> +; mpeg4 qpel
>> +
>> +%macro MPEG4_QPEL16_H_LOWPASS 1
>> +cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8
>
> So it seems like dsputil.asm is becoming the new dumping ground for
> functions of all kind. It doubles in size after your patch and at
> around 2k lines it starts to work against our current efforts of
> splitting dsputil into sensibly-sized pieces. If you continue your
> porting efforts, it will probably end up around 5k lines or so.
>
> Whenever there is an opportunity to make dsputil less monolithic comes
> up, we should exploit it. That seems to be the case here.
I was trying to avoid drama and bikeshedding re: file names and save
that for another patch. I guess I could split it in this patch if you
want.
>> +%macro QPEL_V_LOW 5
>> + paddw m0, m1
>> + mova m4, [pw_20]
>> + pmullw m4, m0
>> + mova m0, %4
>> + mova m5, %1
>> + paddw m5, m0
>> + psubw m4, m5
>> + mova m5, %2
>> + mova m6, %3
>> + paddw m5, m3
>> + paddw m6, m2
>> + paddw m6, m6
>> + psubw m5, m6
>> + pmullw m5, [pw_3]
>> + paddw m4, [PW_ROUND]
>> + paddw m5, m4
>> + psraw m5, 5
>> + packuswb m5, m5
>> + OP_MOV %5, m5, m7
>> + SWAP 0,1,2,3
>> +%endmacro
>
> nit: SWAP is not special, format its arguments like the rest of the
> macro instructions.
I disagree on this one, I think SWAP is special.
>> --- a/libavcodec/x86/dsputil_avg_template.c
>> +++ b/libavcodec/x86/dsputil_avg_template.c
>> @@ -24,781 +24,32 @@
>> //FIXME the following could be optimized too ...
>> +static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t
>> *pixels, int line_size, int h){
>> + DEF(ff_put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
>> + DEF(ff_put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
>> }
>> +static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
>> int line_size, int h){
>> + DEF(ff_put_pixels8_y2)(block , pixels , line_size, h);
>> + DEF(ff_put_pixels8_y2)(block+8, pixels+8, line_size, h);
>> }
>> +static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t
>> *pixels, int line_size, int h){
>> + DEF(ff_put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
>> + DEF(ff_put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
>> }
>> +static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, int
>> line_size, int h){
>> + DEF(ff_avg_pixels8)(block , pixels , line_size, h);
>> + DEF(ff_avg_pixels8)(block+8, pixels+8, line_size, h);
>> }
>> +static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels,
>> int line_size, int h){
>> + DEF(ff_avg_pixels8_x2)(block , pixels , line_size, h);
>> + DEF(ff_avg_pixels8_x2)(block+8, pixels+8, line_size, h);
>> }
>> +static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
>> int line_size, int h){
>> + DEF(ff_avg_pixels8_y2)(block , pixels , line_size, h);
>> + DEF(ff_avg_pixels8_y2)(block+8, pixels+8, line_size, h);
>> }
>> +static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels,
>> int line_size, int h){
>> + DEF(ff_avg_pixels8_xy2)(block , pixels , line_size, h);
>> + DEF(ff_avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
>> }
>
> If you feel motivated, you could fix the formatting as you are changing
> all lines anyway.
Fixed.
>> --- a/libavcodec/x86/dsputil_mmx.c
>> +++ b/libavcodec/x86/dsputil_mmx.c
>> @@ -80,6 +80,143 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = {
>> 0xFEFEFEFEFEFEFEFEULL, 0xFEF
>> +
>> +#if HAVE_YASM
>> +/* VC-1-specific */
>> +#define ff_put_pixels8_mmx ff_put_pixels8_mmxext
>> +void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
>> + int stride, int rnd)
>> +{
>> + ff_put_pixels8_mmx(dst, src, stride, 8);
>> +}
>> +
>> +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
>> + int stride, int rnd)
>> +{
>> + ff_avg_pixels8_mmxext(dst, src, stride, 8);
>> +}
>> +
>> +
>> +/***********************************/
>> +/* 3Dnow specific */
>> +
>> +#define DEF(x) x ## _3dnow
>> +
>> +#include "dsputil_avg_template.c"
>> +
>> +#undef DEF
>> +
>> +/***********************************/
>> +/* MMXEXT specific */
>> +
>> +#define DEF(x) x ## _mmxext
>> +
>> +#include "dsputil_avg_template.c"
>> +
>> +#undef DEF
>> +
>> +#endif /* HAVE_YASM */
>
> Please keep these blocks where they are for now to make the patch
> more readable. We can move them around later.
Fixed.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel