I mistyped one partition size, instead of 8x6 it will be 8x8, rest are correct.
Regards, Praveen Tiwari On Mon, Nov 11, 2013 at 2:58 PM, <[email protected]> wrote: > # HG changeset patch > # User Praveen Tiwari > # Date 1384162089 -19800 > # Node ID 6da0a0291ed8d10dc3dfdb3df396cd1a8c74ceeb > # Parent da0b44e67fe07caa7ed113ec4946a371d96801be > asm code for blockcopy_ps, 8x6, 8x16 and 8x32 > > diff -r da0b44e67fe0 -r 6da0a0291ed8 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Mon Nov 11 14:36:21 2013 > +0530 > +++ b/source/common/x86/asm-primitives.cpp Mon Nov 11 14:58:09 2013 > +0530 > @@ -459,6 +459,9 @@ > p.chroma_copy_ps[CHROMA_8x2] = x265_blockcopy_ps_8x2_sse4; > p.chroma_copy_ps[CHROMA_8x4] = x265_blockcopy_ps_8x4_sse4; > p.chroma_copy_ps[CHROMA_8x6] = x265_blockcopy_ps_8x6_sse4; > + p.chroma_copy_ps[CHROMA_8x8] = x265_blockcopy_ps_8x8_sse4; > + p.chroma_copy_ps[CHROMA_8x16] = x265_blockcopy_ps_8x16_sse4; > + p.chroma_copy_ps[CHROMA_8x32] = x265_blockcopy_ps_8x32_sse4; > } > if (cpuMask & X265_CPU_AVX) > { > diff -r da0b44e67fe0 -r 6da0a0291ed8 source/common/x86/blockcopy8.asm > --- a/source/common/x86/blockcopy8.asm Mon Nov 11 14:36:21 2013 +0530 > +++ b/source/common/x86/blockcopy8.asm Mon Nov 11 14:58:09 2013 +0530 > @@ -1743,3 +1743,46 @@ > movu [r0 + r1], m0 > > RET > + > > +;----------------------------------------------------------------------------- > +; void blockcopy_ps_%1x%2(int16_t *dest, intptr_t destStride, pixel *src, > intptr_t srcStride); > > +;----------------------------------------------------------------------------- > +%macro BLOCKCOPY_PS_W8_H4 2 > +INIT_XMM sse4 > +cglobal blockcopy_ps_%1x%2, 4, 5, 1, dest, destStride, src, srcStride > + > +add r1, r1 > +mov r4d, %2/4 > + > +.loop > + movh m0, [r2] > + pmovzxbw m0, m0 > + movu [r0], m0 > + > + movh m0, [r2 + r3] > + pmovzxbw m0, m0 > + movu [r0 + r1], m0 > + > + movh m0, [r2 + 2 * r3] > + pmovzxbw m0, m0 > + movu [r0 + 2 * r1], m0 > + > + lea r2, [r2 + 2 * r3] > + lea r0, [r0 + 2 * r1] > + > + movh m0, [r2 + r3] > + pmovzxbw m0, m0 > + movu [r0 + r1], m0 > + > + lea r0, [r0 + 2 * r1] > + lea r2, [r2 + 2 * r3] > + > + dec r4d > + jnz .loop > + > +RET > +%endmacro > + > +BLOCKCOPY_PS_W8_H4 8, 8 > +BLOCKCOPY_PS_W8_H4 8, 16 > +BLOCKCOPY_PS_W8_H4 8, 32 > diff -r da0b44e67fe0 -r 6da0a0291ed8 source/common/x86/blockcopy8.h > --- a/source/common/x86/blockcopy8.h Mon Nov 11 14:36:21 2013 +0530 > +++ b/source/common/x86/blockcopy8.h Mon Nov 11 14:58:09 2013 +0530 > @@ -96,7 +96,10 @@ > #define CHROMA_BLOCKCOPY_DEF_SSE4(cpu) \ > SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 2, cpu); \ > SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 4, cpu); \ > - SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 6, cpu); > + SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 6, cpu); \ > + SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 8, cpu); \ > + SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 16, cpu); \ > + SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 32, cpu); > > CHROMA_BLOCKCOPY_DEF_SSE4(_sse4); > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
