At 2013-11-22 16:08:54,[email protected] wrote: ># HG changeset patch ># User Murugan Vairavel <[email protected]> ># Date 1385107712 -19800 ># Fri Nov 22 13:38:32 2013 +0530 ># Node ID 596deb572c970cf407b7f18f9b07d122487530a6 ># Parent 7a576d46cc9067c937677d013d70f5ed9639107b >asm: code for sse_pp_24x32 routine > >diff -r 7a576d46cc90 -r 596deb572c97 source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Fri Nov 22 13:20:33 2013 +0530 >+++ b/source/common/x86/asm-primitives.cpp Fri Nov 22 13:38:32 2013 +0530 >@@ -604,6 +604,7 @@ > SA8D_INTER_FROM_BLOCK(sse4); > > p.sse_pp[LUMA_12x16] = x265_pixel_ssd_12x16_sse4; >+ p.sse_pp[LUMA_24x32] = x265_pixel_ssd_24x32_sse4; > > CHROMA_PIXELSUB_PS(_sse4); > >diff -r 7a576d46cc90 -r 596deb572c97 source/common/x86/pixel-a.asm >--- a/source/common/x86/pixel-a.asm Fri Nov 22 13:20:33 2013 +0530 >+++ b/source/common/x86/pixel-a.asm Fri Nov 22 13:38:32 2013 +0530 >@@ -482,7 +482,6 @@ > SSD 32, 8 > SSD 8, 32 > SSD 32, 24 >-SSD 24, 32 > SSD 24, 24 ; not used, but resolves x265_pixel_ssd_24x24_sse2.startloop symbol > SSD 8, 4 > SSD 8, 8 >@@ -613,6 +612,73 @@ > RET > > ;----------------------------------------------------------------------------- >+; int pixel_ssd_24x32( uint8_t *, intptr_t, uint8_t *, intptr_t ) >+;----------------------------------------------------------------------------- >+INIT_XMM sse4 >+cglobal pixel_ssd_24x32, 4, 7, 8, src1, stride1, src2, stride2 >+ >+ pxor m7, m7 >+ pxor m6, m6 >+ mov r4d, 16 >+ >+.loop >+ movu m1, [r0] >+ pmovzxbw m0, m1 >+ punpckhbw m1, m6 >+ movh m2, [r0 + 16] >+ pmovzxbw m2, m2 >+ movu m4, [r2] >+ pmovzxbw m3, m4 >+ punpckhbw m4, m6 >+ movh m5, [r2 + 16] >+ pmovzxbw m5, m5 pmovzxbw not need alignment acess, so we can merge it >+ >+ psubw m0, m3 >+ psubw m1, m4 >+ psubw m2, m5 >+ >+ pmaddwd m0, m0 >+ pmaddwd m1, m1 >+ pmaddwd m2, m2 >+ >+ paddd m0, m1 >+ paddd m7, m2 >+ paddd m7, m0 >+ >+ movu m1, [r0 + r1] >+ pmovzxbw m0, m1 >+ punpckhbw m1, m6 >+ movh m2, [r0 + r1 + 16] >+ pmovzxbw m2, m2 >+ movu m4, [r2 + r3] >+ pmovzxbw m3, m4 >+ punpckhbw m4, m6 >+ movh m5, [r2 + r3 + 16] >+ pmovzxbw m5, m5 >+ >+ psubw m0, m3 >+ psubw m1, m4 >+ psubw m2, m5 >+ >+ pmaddwd m0, m0 >+ pmaddwd m1, m1 >+ pmaddwd m2, m2 >+ >+ paddd m0, m1 >+ paddd m7, m2 >+ paddd m7, m0 >+ >+ dec r4d >+ lea r0, [r0 + 2 * r1] >+ lea r2, [r2 + 2 * r3] >+ jnz .loop >+ >+ HADDD m7, m1 >+ movd eax, m7 >+ >+ RET >+ >+;----------------------------------------------------------------------------- > ; void pixel_ssd_nv12_core( uint16_t *pixuv1, intptr_t stride1, uint16_t > *pixuv2, intptr_t stride2, > ; int width, int height, uint64_t *ssd_u, uint64_t > *ssd_v ) > ; >diff -r 7a576d46cc90 -r 596deb572c97 source/common/x86/pixel.h >--- a/source/common/x86/pixel.h Fri Nov 22 13:20:33 2013 +0530 >+++ b/source/common/x86/pixel.h Fri Nov 22 13:38:32 2013 +0530 >@@ -373,5 +373,6 @@ > void x265_transpose32_sse2(pixel *dest, pixel *src, intptr_t stride); > void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride); > int x265_pixel_ssd_12x16_sse4(pixel *, intptr_t, pixel *, intptr_t); >+int x265_pixel_ssd_24x32_sse4(pixel *, intptr_t, pixel *, intptr_t); > > #endif // ifndef X265_I386_PIXEL_H >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
