# HG changeset patch # User Praveen Tiwari # Date 1383833451 -19800 # Node ID fed3fbe5e9f1942da657957821a5d1bb396f3d37 # Parent b6f08fc7aef3141e2049701515d23169ef6e232d asm code for blockfil_s, 16x16
diff -r b6f08fc7aef3 -r fed3fbe5e9f1 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Thu Nov 07 18:59:28 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Thu Nov 07 19:40:51 2013 +0530 @@ -364,6 +364,7 @@ p.blockfill_s[BLOCK_4x4] = x265_blockfil_s_4x4_sse2; p.blockfill_s[BLOCK_8x8] = x265_blockfil_s_8x8_sse2; + p.blockfill_s[BLOCK_16x16] = x265_blockfil_s_16x16_sse2; #if X86_64 p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2; p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2; diff -r b6f08fc7aef3 -r fed3fbe5e9f1 source/common/x86/blockcopy8.asm --- a/source/common/x86/blockcopy8.asm Thu Nov 07 18:59:28 2013 +0530 +++ b/source/common/x86/blockcopy8.asm Thu Nov 07 19:40:51 2013 +0530 @@ -1694,3 +1694,56 @@ movu [r0 + r1], m0 RET + +;----------------------------------------------------------------------------- +; void blockfil_s_%1x%2(int16_t *dest, intptr_t destride, int16_t val) +;----------------------------------------------------------------------------- +%macro BLOCKFIL_S_W16_H8 2 +INIT_XMM sse2 +cglobal blockfil_s_%1x%2, 3, 5, 1, dest, destStride, val + +mov r3d, %2 + +add r1, r1 + +movd m0, r2d +pshuflw m0, m0, 0 +pshufd m0, m0, 0 + +.loop + movu [r0], m0 + movu [r0 + 16], m0 + + movu [r0 + r1], m0 + movu [r0 + r1 + 16], m0 + + movu [r0 + 2 * r1], m0 + movu [r0 + 2 * r1 + 16], m0 + + lea r4, [r0 + 2 * r1] + movu [r4 + r1], m0 + movu [r4 + r1 + 16], m0 + + movu [r0 + 4 * r1], m0 + movu [r0 + 4 * r1 + 16], m0 + + lea r4, [r0 + 4 * r1] + movu [r4 + r1], m0 + movu [r4 + r1 + 16], m0 + + movu [r4 + 2 * r1], m0 + movu [r4 + 2 * r1 + 16], m0 + + lea r4, [r4 + 2 * r1] + movu [r4 + r1], m0 + movu [r4 + r1 + 16], m0 + + lea r0, [r0 + 8 * r1] + + sub r3d, 8 + jnz .loop + +RET +%endmacro + +BLOCKFIL_S_W16_H8 16, 16 diff -r b6f08fc7aef3 -r fed3fbe5e9f1 source/common/x86/pixel.h --- a/source/common/x86/pixel.h Thu Nov 07 18:59:28 2013 +0530 +++ b/source/common/x86/pixel.h Thu Nov 07 19:40:51 2013 +0530 @@ -268,6 +268,7 @@ void x265_blockfil_s_4x4_sse2(int16_t *dst, intptr_t dstride, int16_t val); void x265_blockfil_s_8x8_sse2(int16_t *dst, intptr_t dstride, int16_t val); +void x265_blockfil_s_16x16_sse2(int16_t *dst, intptr_t dstride, int16_t val); #undef DECL_PIXELS #undef DECL_SUF _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel