I guess this function have some problem, I am not sure he verify this function with testbench before upload the problem is "j" and "l" miss a pixel, in some time, it make a mistake
At 2013-11-18 15:06:07,[email protected] wrote: ># HG changeset patch ># User Murugan Vairavel <[email protected]> ># Date 1384757077 -19800 ># Mon Nov 18 12:14:37 2013 +0530 ># Node ID d756003f63691b7677b4cf4c98fbb2a1d67dbb02 ># Parent e2895ce7bbeb2c3d845fee2578758d0012fa2cb4 >asm: code for scale2D_64to32 routine > >diff -r e2895ce7bbeb -r d756003f6369 source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Sun Nov 17 11:24:13 2013 -0600 >+++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 12:14:37 2013 +0530 >@@ -529,6 +529,7 @@ > PIXEL_AVG_W4(ssse3); > > p.scale1D_128to64 = x265_scale1D_128to64_ssse3; >+ p.scale2D_64to32 = x265_scale2D_64to32_ssse3; > > p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3; > p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3; >diff -r e2895ce7bbeb -r d756003f6369 source/common/x86/pixel-a.asm >--- a/source/common/x86/pixel-a.asm Sun Nov 17 11:24:13 2013 -0600 >+++ b/source/common/x86/pixel-a.asm Mon Nov 18 12:14:37 2013 +0530 >@@ -8230,3 +8230,113 @@ > movu [r0 + 48], m4 > > RET >+ >+;----------------------------------------------------------------- >+; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride) >+;----------------------------------------------------------------- >+INIT_XMM ssse3 >+cglobal scale2D_64to32, 3, 4, 8, dest, src, stride >+ >+ mova m7, [deinterleave_shuf] >+ mov r3d, 32 >+.loop >+ >+ movu m0, [r1] ;i >+ palignr m1, m0, 1 ;j >+ movu m2, [r1 + r2] ;k >+ palignr m3, m2, 1 ;l >+ movu m4, m0 >+ movu m5, m2 >+ >+ pxor m4, m1 ;i^j >+ pxor m5, m3 ;k^l >+ por m4, m5 ;ij|kl >+ >+ pavgb m0, m1 ;s >+ pavgb m2, m3 ;t >+ movu m5, m0 >+ pavgb m0, m2 ;(s+t+1)/2 >+ pxor m5, m2 ;s^t >+ pand m4, m5 ;(ij|kl)&st >+ pand m4, [hmul_16p] >+ psubb m0, m4 ;Result >+ >+ movu m1, [r1 + 16] ;i >+ palignr m2, m1, 1 ;j >+ movu m3, [r1 + r2 + 16] ;k >+ palignr m4, m3, 1 ;l >+ movu m5, m1 >+ movu m6, m3 >+ >+ pxor m5, m2 ;i^j >+ pxor m6, m4 ;k^l >+ por m5, m6 ;ij|kl >+ >+ pavgb m1, m2 ;s >+ pavgb m3, m4 ;t >+ movu m6, m1 >+ pavgb m1, m3 ;(s+t+1)/2 >+ pxor m6, m3 ;s^t >+ pand m5, m6 ;(ij|kl)&st >+ pand m5, [hmul_16p] >+ psubb m1, m5 ;Result >+ >+ pshufb m0, m0, m7 >+ pshufb m1, m1, m7 >+ >+ punpcklqdq m0, m1 >+ movu [r0], m0 >+ >+ movu m0, [r1 + 32] ;i >+ palignr m1, m0, 1 ;j >+ movu m2, [r1 + r2 + 32] ;k >+ palignr m3, m2, 1 ;l >+ movu m4, m0 >+ movu m5, m2 >+ >+ pxor m4, m1 ;i^j >+ pxor m5, m3 ;k^l >+ por m4, m5 ;ij|kl >+ >+ pavgb m0, m1 ;s >+ pavgb m2, m3 ;t >+ movu m5, m0 >+ pavgb m0, m2 ;(s+t+1)/2 >+ pxor m5, m2 ;s^t >+ pand m4, m5 ;(ij|kl)&st >+ pand m4, [hmul_16p] >+ psubb m0, m4 ;Result >+ >+ movu m1, [r1 + 48] ;i >+ palignr m2, m1, 1 ;j >+ movu m3, [r1 + r2 + 48] ;k >+ palignr m4, m3, 1 ;l >+ movu m5, m1 >+ movu m6, m3 >+ >+ pxor m5, m2 ;i^j >+ pxor m6, m4 ;k^l >+ por m5, m6 ;ij|kl >+ >+ pavgb m1, m2 ;s >+ pavgb m3, m4 ;t >+ movu m6, m1 >+ pavgb m1, m3 ;(s+t+1)/2 >+ pxor m6, m3 ;s^t >+ pand m5, m6 ;(ij|kl)&st >+ pand m5, [hmul_16p] >+ psubb m1, m5 ;Result >+ >+ pshufb m0, m0, m7 >+ pshufb m1, m1, m7 >+ >+ punpcklqdq m0, m1 >+ movu [r0 + 16], m0 >+ >+ lea r0, [r0 + 32] >+ lea r1, [r1 + 2 * r2] >+ dec r3d >+ >+ jnz .loop >+ >+RET >diff -r e2895ce7bbeb -r d756003f6369 source/common/x86/pixel.h >--- a/source/common/x86/pixel.h Sun Nov 17 11:24:13 2013 -0600 >+++ b/source/common/x86/pixel.h Mon Nov 18 12:14:37 2013 +0530 >@@ -117,6 +117,7 @@ > int x265_pixel_satd_16x32_sse2(pixel *, intptr_t, pixel *, intptr_t); > int x265_pixel_satd_16x64_sse2(pixel *, intptr_t, pixel *, intptr_t); > void x265_scale1D_128to64_ssse3(pixel *, pixel *, intptr_t); >+void x265_scale2D_64to32_ssse3(pixel *, pixel *, intptr_t); > > DECL_PIXELS(uint64_t, var, mmx2, (pixel * pix, intptr_t i_stride)) > DECL_PIXELS(uint64_t, var, sse2, (pixel * pix, intptr_t i_stride)) >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
