Excuse me, press button early. Good code, but need do some insert some spaces before RET and remove unused blank line.
At 2013-11-18 22:24:12,[email protected] wrote: ># HG changeset patch ># User Murugan Vairavel <[email protected]> ># Date 1384784621 -19800 ># Mon Nov 18 19:53:41 2013 +0530 ># Node ID d24c22e915afd33a122326516b41eecf7e055934 ># Parent a4735d0fe4759c72a3af408a43723f219688eeb4 >asm: code for transpose4x4 routine > >diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530 >+++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530 >@@ -545,6 +545,7 @@ > p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2; > p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; > p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; >+ p.transpose[BLOCK_4x4] = x265_transpose4_sse2; > } > if (cpuMask & X265_CPU_SSSE3) > { >diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm >--- a/source/common/x86/pixel-a.asm Mon Nov 18 18:59:20 2013 +0530 >+++ b/source/common/x86/pixel-a.asm Mon Nov 18 19:53:41 2013 +0530 >@@ -8340,3 +8340,25 @@ > jnz .loop > > RET >+ >+;----------------------------------------------------------------- >+; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride) >+;----------------------------------------------------------------- >+INIT_XMM sse2 >+cglobal transpose4, 3, 3, 4, dest, src, stride >+ >+ movd m0, [r1] >+ movd m1, [r1 + r2] >+ movd m2, [r1 + 2 * r2] >+ >+ lea r1, [r1 + 2 * r2] >+ >+ movd m3, [r1 + r2] >+ >+ punpcklbw m0, m1 >+ punpcklbw m2, m3 >+ punpcklwd m0, m2 >+ >+ movu [r0], m0 >+ >+RET >diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h >--- a/source/common/x86/pixel.h Mon Nov 18 18:59:20 2013 +0530 >+++ b/source/common/x86/pixel.h Mon Nov 18 19:53:41 2013 +0530 >@@ -365,5 +365,6 @@ > void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, > intptr_t stride); > void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, > intptr_t stride); > void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, > intptr_t stride); >+void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride); > > #endif // ifndef X265_I386_PIXEL_H >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
