On Wed, Feb 13, 2013 at 05:53:36PM -0500, Daniel Kang wrote:
>
> --- a/libavcodec/x86/cavsdsp.c
> +++ b/libavcodec/x86/cavsdsp.c
> @@ -475,12 +481,18 @@ CAVS_MC(put_, 8, 3dnow)
> CAVS_MC(put_, 16,3dnow)
> CAVS_MC(avg_, 8, 3dnow)
> CAVS_MC(avg_, 16,3dnow)
> +#endif /* HAVE_AMD3DNOW_INLINE */
>
> static av_cold void ff_cavsdsp_init_3dnow(CAVSDSPContext *c,
> AVCodecContext *avctx)
> {
> +#if HAVE_YASM
> + c->put_cavs_qpel_pixels_tab[0][0] = ff_put_cavs_qpel16_mc00_mmxext;
> + c->put_cavs_qpel_pixels_tab[1][0] = ff_put_cavs_qpel16_mc00_mmxext;
> +#endif
>
> +#if HAVE_INLINE_ASM
> #define dspfunc(PFX, IDX, NUM) \
> - c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \
> c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
> c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
mmxext functions in the 3dnow init function?
> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -128,26 +136,45 @@ void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t
> *block,
> void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t
> line_size, int h);
> static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
> - int line_size, int h)
> + ptrdiff_t line_size, int h)
Is there a reason not to do this separately, i.e. right away?
> @@ -1291,31 +1176,29 @@ void ff_put_pixels16_sse2(uint8_t *block, const
> uint8_t *pixels,
>
> -#if HAVE_INLINE_ASM
> +#if HAVE_YASM
>
> /* CAVS-specific */
> void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
> {
> - put_pixels8_mmx(dst, src, stride, 8);
> + ff_put_pixels8_mmx(dst, src, stride, 8);
> }
>
> void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
> {
> - avg_pixels8_mmx(dst, src, stride, 8);
> + ff_avg_pixels8_mmx(dst, src, stride, 8);
> }
>
> void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
> {
> - put_pixels16_mmx(dst, src, stride, 16);
> + ff_put_pixels16_mmx(dst, src, stride, 16);
> }
>
> void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
> {
> - avg_pixels16_mmx(dst, src, stride, 16);
> + ff_avg_pixels16_mmx(dst, src, stride, 16);
> }
> -#endif /* HAVE_INLINE_ASM */
NOTE (to self): Now that this no longer relies on static functions,
it can be moved out of the dsputil_mmx file into the codec-specific
init files.
> --- a/libavcodec/x86/dsputil_rnd_template.c
> +++ b/libavcodec/x86/dsputil_rnd_template.c
> @@ -25,570 +25,28 @@
>
> //FIXME optimize
> -static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h){
> - DEF(put, pixels8_y2)(block , pixels , line_size, h);
> - DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
> +static void DEF(ff_put, pixels16_y2)(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h){
> + DEF(ff_put, pixels8_y2)(block , pixels , line_size, h);
> + DEF(ff_put, pixels8_y2)(block+8, pixels+8, line_size, h);
> }
Is the FIXME comment still valid in some way?
Please prettyprint those lines that you are changing anyway, I think
I gave you an sed expression to do it automatically the last time.
It should still work and/or be easy to adopt.
> --- a/libavcodec/x86/hpeldsp.asm
> +++ b/libavcodec/x86/hpeldsp.asm
> @@ -22,9 +22,60 @@
>
> +%macro PAVGBP_MMX 6
> + mova %3, %1
> + mova %6, %4
> + por %3, %2
> + por %6, %5
> + pxor %2, %1
> + pxor %5, %4
> + pand %2, m6
> + pand %5, m6
> + psrlq %2, 1
> + psrlq %5, 1
> + psubb %3, %2
> + psubb %6, %5
> +%endmacro
> +
> +%macro PAVGBP_NO_RND_MMX 6
> + mova %3, %1
> + mova %6, %4
> + pand %3, %2
> + pand %6, %5
> + pxor %2, %1
> + pxor %5, %4
> + pand %2, m6
> + pand %5, m6
> + psrlq %2, 1
> + psrlq %5, 1
> + paddb %3, %2
> + paddb %6, %5
> +%endmacro
> +
> +%macro PAVGB_OP_MMX 4
> + mova %3, %1
> + por %3, %2
> + pxor %2, %1
> + pand %2, %4
> + psrlq %2, 1
> + psubb %3, %2
> +%endmacro
> +
> +%macro PAVGB_NRND_OP_MMX 4
> + mova %3, %1
> + pand %3, %2
> + pxor %2, %1
> + pand %2, %4
> + psrlq %2, 1
> + paddb %3, %2
> +%endmacro
nit: This would look prettier with arguments vertically aligned
across macro boundaries.
> @@ -56,6 +107,44 @@ PUT_PIXELS8_X2
>
> +%macro PUT_PIXELS8_X2_MMX 0-1
> +%if %0 == 1
> +cglobal put%1_pixels8_x2, 4,4
> +%else
> +cglobal put_pixels8_x2, 4,4
> +%endif
IIRC you don't need the %if, but you can just pass an empty
first parameter and it should do the right thing.
.. more below ..
> + lea r1, [r1+r2*2]
> + lea r0, [r0+r2*2]
> + sub r3d, 4
> + jne .loop
> + RET
Weird placement of .loop; I suggest aligning it with the rest.
Probably it is handled inconsistently throughout...
> @@ -453,6 +753,201 @@ AVG_PIXELS8_XY2
>
> +%macro AVG_PIXELS8_XY2_MMX 0-1
Some macros have comments with the C functions they implement, some
don't. Please add the comments everywhere, I consider them helpful.
Diego
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel