On Tue, May 24, 2011 at 9:25 AM, Ronald S. Bultje <[email protected]>wrote:
> Hi, > > On Tue, May 24, 2011 at 8:15 AM, Daniel Kang <[email protected]> > wrote: > > diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h > [..] > > @@ -66,7 +66,6 @@ typedef struct H264DSPContext{ > > void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM > *block/*align 16*/, int stride); > > void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM > *block/*align 16*/, int stride); > > > > - void (*h264_dct)(DCTELEM block[4][4]); > > void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int > *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t > nnzc[6*8]); > > void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int > *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t > nnzc[6*8]); > > void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int > *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t > nnzc[6*8]); > [..] > > diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm > [..] > > @@ -125,7 +125,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0 > > SUMSUB_BA w, 0, 4 > > SUMSUB_BA w, 3, 2 > > SUMSUB_BA w, 1, 5 > > - SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 > > + SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 > > %endmacro > > > > %macro IDCT8_1D_FULL 1 > [..] > > @@ -261,7 +261,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10 > > packuswb m1, m1 > > %endmacro > > > > -%macro DC_ADD_MMX2_OP 3-4 > > +%macro DC_ADD_MMX2_OP 4 > > %1 m2, [%2 ] > > %1 m3, [%2+%3 ] > > %1 m4, [%2+%3*2] > > All unrelated, but fine as a separate patch. > What is the best way to split this? I can't get "git gui" to unstage certain changes in the same file... > > > @@ -840,7 +840,7 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8 > > > > ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, > > ; DCTELEM *block, int stride, const uint8_t > nnzc[6*8]) > > -cglobal h264_idct_add8_sse2, 5, 7, 8 > > +cglobal h264_idct_add8_8_sse2, 5, 7, 8 > > add r2, 512 > > %ifdef ARCH_X86_64 > > mov r10, r0 > > I'd recommend doing the 8bit to _8 renames in a separate patch also, > makes the actual patch a little smaller… > Can this stay in? Otherwise, once I'll fix it once I learn how to unstage changes in parts of files. > > > diff --git a/libavcodec/x86/h264_idct_10bit.asm > b/libavcodec/x86/h264_idct_10bit.asm > [..] > > > +;----------------------------------------------------------------------------- > > +; void h264_idct_dc_add(pixel *dst, dctcoef *block, int stride) > > > +;----------------------------------------------------------------------------- > > +%macro IDCT_DC_ADD_OP_10 0 > > + mova m1, [r0+ 0] > > + mova m2, [r0+r2] > > + mova m3, [r1+ 0] > > + mova m4, [r1+r2] > > + pxor m5, m5 > > + paddw m1, m0 > > + paddw m2, m0 > > + paddw m3, m0 > > + paddw m4, m0 > > + CLIPW m1, m5, m6 > > + CLIPW m2, m5, m6 > > + CLIPW m3, m5, m6 > > + CLIPW m4, m5, m6 > > Strictly speaking, CLIPW is not a single instruction, so I'm assuming > that interleaving this (that's why STORE_DIFFx2 exists, for example) > in a CLIPWx2 or even CLIPWx4 instruction might help on Atoms… > I have no access to an Atom for testing. > > Rest looks pretty good, nice work! > Thanks! > > Ronald
_______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
