# HG changeset patch # User Praveen Tiwari # Date 1422867249 -19800 # Branch stable # Node ID 2618352a21d5917ee8c1f79bcc159e858dd19daa # Parent e2c958ff874e2bf8992ba22605e993530e8a2d8c blockfill_s_8x8 sse2 asm code optimization
improved, 100.04c -> 90.05c diff -r e2c958ff874e -r 2618352a21d5 source/common/x86/blockcopy8.asm --- a/source/common/x86/blockcopy8.asm Sat Jan 31 13:48:34 2015 -0600 +++ b/source/common/x86/blockcopy8.asm Mon Feb 02 14:24:09 2015 +0530 @@ -1748,9 +1748,10 @@ ; void blockfill_s_8x8(int16_t* dst, intptr_t dstride, int16_t val) ;----------------------------------------------------------------------------- INIT_XMM sse2 -cglobal blockfill_s_8x8, 3, 3, 1, dst, dstStride, val +cglobal blockfill_s_8x8, 3, 4, 1, dst, dstStride, val add r1, r1 +lea r3, [3 * r1] movd m0, r2d pshuflw m0, m0, 0 @@ -1760,17 +1761,13 @@ movu [r0 + r1], m0 movu [r0 + 2 * r1], m0 -lea r0, [r0 + 2 * r1] +movu [r0 + r3], m0 +movu [r0 + 4 * r1], m0 + +lea r0, [r0 + 4 * r1] movu [r0 + r1], m0 movu [r0 + 2 * r1], m0 - -lea r0, [r0 + 2 * r1] -movu [r0 + r1], m0 -movu [r0 + 2 * r1], m0 - -lea r0, [r0 + 2 * r1] -movu [r0 + r1], m0 - +movu [r0 + r3], m0 RET ;----------------------------------------------------------------------------- _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
