right At 2015-02-03 20:25:48,[email protected] wrote: ># HG changeset patch ># User Praveen Tiwari ># Date 1422966338 -19800 ># Node ID 3fc854e9e1b07e490c1422635dffea7b62e911c9 ># Parent bfc9a2d99e20568cb43d9fba0133735009793b00 >blockcopy_pp_8x12: sse2 asm code optimization > >improved, 235.05c -> 158.79c > >diff -r bfc9a2d99e20 -r 3fc854e9e1b0 source/common/x86/blockcopy8.asm >--- a/source/common/x86/blockcopy8.asm Tue Feb 03 17:14:55 2015 +0530 >+++ b/source/common/x86/blockcopy8.asm Tue Feb 03 17:55:38 2015 +0530 >@@ -351,17 +351,34 @@ > ; void blockcopy_pp_8x12(pixel* dst, intptr_t dstStride, const pixel* src, > intptr_t srcStride) > ;----------------------------------------------------------------------------- > INIT_XMM sse2 >-cglobal blockcopy_pp_8x12, 4, 5, 2 >- mov r4d, 12/2 >-.loop: >- movh m0, [r2] >- movh m1, [r2 + r3] >- movh [r0], m0 >- movh [r0 + r1], m1 >- dec r4d >- lea r0, [r0 + 2 * r1] >- lea r2, [r2 + 2 * r3] >- jnz .loop >+cglobal blockcopy_pp_8x12, 4, 5, 4 >+ >+ lea r4, [3 * r3] >+ lea r5, [3 * r1] >+ >+ movh m0, [r2] >+ movh m1, [r2 + r3] >+ movh m2, [r2 + 2 * r3] >+ movh m3, [r2 + r4] >+ >+ movh [r0], m0 >+ movh [r0 + r1], m1 >+ movh [r0 + 2 * r1], m2 >+ movh [r0 + r5], m3 >+ >+ %rep 2 >+ lea r2, [r2 + 4 * r3] >+ movh m0, [r2] >+ movh m1, [r2 + r3] >+ movh m2, [r2 + 2 * r3] >+ movh m3, [r2 + r4] >+ >+ lea r0, [r0 + 4 * r1] >+ movh [r0], m0 >+ movh [r0 + r1], m1 >+ movh [r0 + 2 * r1], m2 >+ movh [r0 + r5], m3 >+ %endrep > RET > > ;----------------------------------------------------------------------------- >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
