On Wed, Feb 13, 2013 at 05:53:36PM -0500, Daniel Kang wrote:
> 
> --- a/libavcodec/x86/cavsdsp.c
> +++ b/libavcodec/x86/cavsdsp.c
> @@ -475,12 +481,18 @@ CAVS_MC(put_, 8, 3dnow)
>  CAVS_MC(put_, 16,3dnow)
>  CAVS_MC(avg_, 8, 3dnow)
>  CAVS_MC(avg_, 16,3dnow)
> +#endif /* HAVE_AMD3DNOW_INLINE */
>  
>  static av_cold void ff_cavsdsp_init_3dnow(CAVSDSPContext *c,
>                                            AVCodecContext *avctx)
>  {
> +#if HAVE_YASM
> +    c->put_cavs_qpel_pixels_tab[0][0] = ff_put_cavs_qpel16_mc00_mmxext;
> +    c->put_cavs_qpel_pixels_tab[1][0] = ff_put_cavs_qpel16_mc00_mmxext;
> +#endif
>
> +#if HAVE_INLINE_ASM
>  #define dspfunc(PFX, IDX, NUM) \
> -    c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \
>      c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
>      c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \

mmxext functions in the 3dnow init function?

> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -128,26 +136,45 @@ void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t 
> *block,
>  void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t 
> line_size, int h);
>  static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
> -                                   int line_size, int h)
> +                                   ptrdiff_t line_size, int h)

Is there a reason not to do this separately, i.e. right away?

> @@ -1291,31 +1176,29 @@ void ff_put_pixels16_sse2(uint8_t *block, const 
> uint8_t *pixels,
>  
> -#if HAVE_INLINE_ASM
> +#if HAVE_YASM
>  
>  /* CAVS-specific */
>  void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
>  {
> -    put_pixels8_mmx(dst, src, stride, 8);
> +    ff_put_pixels8_mmx(dst, src, stride, 8);
>  }
>  
>  void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
>  {
> -    avg_pixels8_mmx(dst, src, stride, 8);
> +    ff_avg_pixels8_mmx(dst, src, stride, 8);
>  }
>  
>  void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
>  {
> -    put_pixels16_mmx(dst, src, stride, 16);
> +    ff_put_pixels16_mmx(dst, src, stride, 16);
>  }
>  
>  void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
>  {
> -    avg_pixels16_mmx(dst, src, stride, 16);
> +    ff_avg_pixels16_mmx(dst, src, stride, 16);
>  }
> -#endif /* HAVE_INLINE_ASM */

NOTE (to self): Now that this no longer relies on static functions,
it can be moved out of the dsputil_mmx file into the codec-specific
init files.

> --- a/libavcodec/x86/dsputil_rnd_template.c
> +++ b/libavcodec/x86/dsputil_rnd_template.c
> @@ -25,570 +25,28 @@
>  
>  //FIXME optimize
> -static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, 
> ptrdiff_t line_size, int h){
> -    DEF(put, pixels8_y2)(block  , pixels  , line_size, h);
> -    DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
> +static void DEF(ff_put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, 
> ptrdiff_t line_size, int h){
> +    DEF(ff_put, pixels8_y2)(block  , pixels  , line_size, h);
> +    DEF(ff_put, pixels8_y2)(block+8, pixels+8, line_size, h);
>  }

Is the FIXME comment still valid in some way?

Please prettyprint those lines that you are changing anyway, I think
I gave you an sed expression to do it automatically the last time.
It should still work and/or be easy to adopt.

> --- a/libavcodec/x86/hpeldsp.asm
> +++ b/libavcodec/x86/hpeldsp.asm
> @@ -22,9 +22,60 @@
>  
> +%macro PAVGBP_MMX 6
> +    mova   %3, %1
> +    mova   %6, %4
> +    por    %3, %2
> +    por    %6, %5
> +    pxor   %2, %1
> +    pxor   %5, %4
> +    pand   %2, m6
> +    pand   %5, m6
> +    psrlq  %2, 1
> +    psrlq  %5, 1
> +    psubb  %3, %2
> +    psubb  %6, %5
> +%endmacro
> +
> +%macro PAVGBP_NO_RND_MMX 6
> +    mova         %3, %1
> +    mova         %6, %4
> +    pand         %3, %2
> +    pand         %6, %5
> +    pxor         %2, %1
> +    pxor         %5, %4
> +    pand         %2, m6
> +    pand         %5, m6
> +    psrlq        %2, 1
> +    psrlq        %5, 1
> +    paddb        %3, %2
> +    paddb        %6, %5
> +%endmacro
> +
> +%macro PAVGB_OP_MMX 4
> +    mova         %3, %1
> +    por          %3, %2
> +    pxor         %2, %1
> +    pand         %2, %4
> +    psrlq        %2, 1
> +    psubb        %3, %2
> +%endmacro
> +
> +%macro PAVGB_NRND_OP_MMX 4
> +    mova   %3, %1
> +    pand   %3, %2
> +    pxor   %2, %1
> +    pand   %2, %4
> +    psrlq  %2, 1
> +    paddb  %3, %2
> +%endmacro

nit: This would look prettier with arguments vertically aligned
across macro boundaries.

> @@ -56,6 +107,44 @@ PUT_PIXELS8_X2
>  
> +%macro PUT_PIXELS8_X2_MMX 0-1
> +%if %0 == 1
> +cglobal put%1_pixels8_x2, 4,4
> +%else
> +cglobal put_pixels8_x2, 4,4
> +%endif

IIRC you don't need the %if, but you can just pass an empty
first parameter and it should do the right thing.
.. more below ..

> +    lea          r1, [r1+r2*2]
> +    lea          r0, [r0+r2*2]
> +    sub         r3d, 4
> +    jne .loop
> +    RET

Weird placement of .loop; I suggest aligning it with the rest.
Probably it is handled inconsistently throughout...

> @@ -453,6 +753,201 @@ AVG_PIXELS8_XY2
>  
> +%macro AVG_PIXELS8_XY2_MMX 0-1

Some macros have comments with the C functions they implement, some
don't.  Please add the comments everywhere, I consider them helpful.

Diego
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to