>+;----------------------------------------------------------------------------- >+; void pixel_add_ps_%1x%2(pixel *dest, int destride, pixel *src0, int16_t >*scr1, int srcStride0, int srcStride1) >+;----------------------------------------------------------------------------- use intprt_t type for stride is better >+%macro PIXEL_ADD_PS_W4_H4 2 >+INIT_XMM sse4 >+cglobal pixel_add_ps_%1x%2, 6, 7, 2, dest, destride, src0, scr1, srcStride0, >srcStride1 >+ >+add r5, r5 >+ >+mov r6d, %2/4 >+ >+.loop >+ movd m0, [r2] >+ pmovzxbw m0, m0 >+ movh m1, [r3] >+ >+ paddw m0, m1 >+ packuswb m0, m0 >+ >+ movd [r0], m0 >+ >+ movd m0, [r2 + r4] >+ pmovzxbw m0, m0 >+ movh m1, [r3 + r5] >+ >+ paddw m0, m1 >+ packuswb m0, m0 >+ >+ movd [r0 + r1], m0 >+ >+ movd m0, [r2 + 2 * r4] >+ pmovzxbw m0, m0 >+ movh m1, [r3 + 2 * r5] >+ >+ paddw m0, m1 >+ packuswb m0, m0 >+ >+ movd [r0 + 2 * r1], m0 >+ >+ lea r0, [r0 + 2 * r1] >+ lea r2, [r2 + 2 * r4] >+ lea r3, [r3 + 2 * r5] >+ >+ movd m0, [r2 + r4] >+ pmovzxbw m0, m0 >+ movh m1, [r3 + r5] >+ >+ paddw m0, m1 >+ packuswb m0, m0 >+ >+ movd [r0 + r1], m0 >+ >+ lea r0, [r0 + 2 * r1] >+ lea r2, [r2 + 2 * r4] >+ lea r3, [r3 + 2 * r5] >+ >+ dec r6d >+ jnz .loop >+ >+RET >+%endmacro >+ >+PIXEL_ADD_PS_W4_H4 4, 8 >+PIXEL_ADD_PS_W4_H4 4, 16 >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
