On Sat, Jan 26, 2013 at 12:32:16AM -0500, Daniel Kang wrote:
> --- a/libavcodec/x86/dsputil.asm
> +++ b/libavcodec/x86/dsputil.asm
> @@ -879,3 +884,984 @@ cglobal avg_pixels16, 4,5,4
> +
> +; HPEL mmxext
> +%macro PAVGB_OP 2
> +%if cpuflag(3dnow)
> + pavgusb %1, %2
> +%else
> + pavgb %1, %2
> +%endif
> +%endmacro
We have a macro for this in x86util.asm and it works the other way around.
I'm very suspicious of this doing the right thing on CPUs with mmxext and
3dnow ...
> +; mpeg4 qpel
> +
> +%macro MPEG4_QPEL16_H_LOWPASS 1
> +cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8
So it seems like dsputil.asm is becoming the new dumping ground for
functions of all kind. It doubles in size after your patch and at
around 2k lines it starts to work against our current efforts of
splitting dsputil into sensibly-sized pieces. If you continue your
porting efforts, it will probably end up around 5k lines or so.
Whenever there is an opportunity to make dsputil less monolithic comes
up, we should exploit it. That seems to be the case here.
> +%macro QPEL_V_LOW 5
> + paddw m0, m1
> + mova m4, [pw_20]
> + pmullw m4, m0
> + mova m0, %4
> + mova m5, %1
> + paddw m5, m0
> + psubw m4, m5
> + mova m5, %2
> + mova m6, %3
> + paddw m5, m3
> + paddw m6, m2
> + paddw m6, m6
> + psubw m5, m6
> + pmullw m5, [pw_3]
> + paddw m4, [PW_ROUND]
> + paddw m5, m4
> + psraw m5, 5
> + packuswb m5, m5
> + OP_MOV %5, m5, m7
> + SWAP 0,1,2,3
> +%endmacro
nit: SWAP is not special, format its arguments like the rest of the
macro instructions.
> --- a/libavcodec/x86/dsputil_avg_template.c
> +++ b/libavcodec/x86/dsputil_avg_template.c
> @@ -24,781 +24,32 @@
> //FIXME the following could be optimized too ...
> +static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t
> *pixels, int line_size, int h){
> + DEF(ff_put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
> + DEF(ff_put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
> }
> +static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
> int line_size, int h){
> + DEF(ff_put_pixels8_y2)(block , pixels , line_size, h);
> + DEF(ff_put_pixels8_y2)(block+8, pixels+8, line_size, h);
> }
> +static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t
> *pixels, int line_size, int h){
> + DEF(ff_put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
> + DEF(ff_put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
> }
> +static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, int
> line_size, int h){
> + DEF(ff_avg_pixels8)(block , pixels , line_size, h);
> + DEF(ff_avg_pixels8)(block+8, pixels+8, line_size, h);
> }
> +static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels,
> int line_size, int h){
> + DEF(ff_avg_pixels8_x2)(block , pixels , line_size, h);
> + DEF(ff_avg_pixels8_x2)(block+8, pixels+8, line_size, h);
> }
> +static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
> int line_size, int h){
> + DEF(ff_avg_pixels8_y2)(block , pixels , line_size, h);
> + DEF(ff_avg_pixels8_y2)(block+8, pixels+8, line_size, h);
> }
> +static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels,
> int line_size, int h){
> + DEF(ff_avg_pixels8_xy2)(block , pixels , line_size, h);
> + DEF(ff_avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
> }
If you feel motivated, you could fix the formatting as you are changing
all lines anyway.
> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -80,6 +80,143 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = {
> 0xFEFEFEFEFEFEFEFEULL, 0xFEF
> +
> +#if HAVE_YASM
> +/* VC-1-specific */
> +#define ff_put_pixels8_mmx ff_put_pixels8_mmxext
> +void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
> + int stride, int rnd)
> +{
> + ff_put_pixels8_mmx(dst, src, stride, 8);
> +}
> +
> +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
> + int stride, int rnd)
> +{
> + ff_avg_pixels8_mmxext(dst, src, stride, 8);
> +}
> +
> +
> +/***********************************/
> +/* 3Dnow specific */
> +
> +#define DEF(x) x ## _3dnow
> +
> +#include "dsputil_avg_template.c"
> +
> +#undef DEF
> +
> +/***********************************/
> +/* MMXEXT specific */
> +
> +#define DEF(x) x ## _mmxext
> +
> +#include "dsputil_avg_template.c"
> +
> +#undef DEF
> +
> +#endif /* HAVE_YASM */
Please keep these blocks where they are for now to make the patch
more readable. We can move them around later.
Diego
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel