# HG changeset patch # User Murugan Vairavel <muru...@multicorewareinc.com> # Date 1386162865 -19800 # Wed Dec 04 18:44:25 2013 +0530 # Node ID a525d3fde24f8c076def1b67122e6f7f69e60d35 # Parent 4347192eae502a5f963d7e79655ba753e677b58b asm: 10bpp code for scale1D_128to64 module
diff -r 4347192eae50 -r a525d3fde24f source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Dec 04 17:11:43 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Dec 04 18:44:25 2013 +0530 @@ -576,6 +576,7 @@ } if (cpuMask & X265_CPU_SSSE3) { + p.scale1D_128to64 = x265_scale1D_128to64_ssse3; } if (cpuMask & X265_CPU_SSE4) { diff -r 4347192eae50 -r a525d3fde24f source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Wed Dec 04 17:11:43 2013 +0530 +++ b/source/common/x86/pixel-util8.asm Wed Dec 04 18:44:25 2013 +0530 @@ -44,6 +44,7 @@ mask_ff: times 16 db 0xff times 16 db 0 deinterleave_shuf: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 +deinterleave_word_shuf: db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 15, 15 hmul_16p: times 16 db 1 times 8 db 1, -1 @@ -1643,7 +1644,98 @@ ;----------------------------------------------------------------- INIT_XMM ssse3 cglobal scale1D_128to64, 2, 2, 8, dest, src1, stride - +%if HIGH_BIT_DEPTH + mova m7, [deinterleave_word_shuf] + + movu m0, [r1] + palignr m1, m0, 2 + movu m2, [r1 + 16] + palignr m3, m2, 2 + movu m4, [r1 + 32] + palignr m5, m4, 2 + movu m6, [r1 + 48] + pavgw m0, m1 + palignr m1, m6, 2 + pavgw m2, m3 + pavgw m4, m5 + pavgw m6, m1 + pshufb m0, m0, m7 + pshufb m2, m2, m7 + pshufb m4, m4, m7 + pshufb m6, m6, m7 + punpcklqdq m0, m2 + movu [r0], m0 + punpcklqdq m4, m6 + movu [r0 + 16], m4 + + + + movu m0, [r1 + 64] + palignr m1, m0, 2 + movu m2, [r1 + 80] + palignr m3, m2, 2 + movu m4, [r1 + 96] + palignr m5, m4, 2 + movu m6, [r1 + 112] + pavgw m0, m1 + palignr m1, m6, 2 + pavgw m2, m3 + pavgw m4, m5 + pavgw m6, m1 + pshufb m0, m0, m7 + pshufb m2, m2, m7 + pshufb m4, m4, m7 + pshufb m6, m6, m7 + punpcklqdq m0, m2 + movu [r0 + 32], m0 + punpcklqdq m4, m6 + movu [r0 + 48], m4 + + movu m0, [r1 + 128] + palignr m1, m0, 2 + movu m2, [r1 + 144] + palignr m3, m2, 2 + movu m4, [r1 + 160] + palignr m5, m4, 2 + movu m6, [r1 + 176] + pavgw m0, m1 + palignr m1, m6, 2 + pavgw m2, m3 + pavgw m4, m5 + pavgw m6, m1 + pshufb m0, m0, m7 + pshufb m2, m2, m7 + pshufb m4, m4, m7 + pshufb m6, m6, m7 + + punpcklqdq m0, m2 + movu [r0 + 64], m0 + punpcklqdq m4, m6 + movu [r0 + 80], m4 + + movu m0, [r1 + 192] + palignr m1, m0, 2 + movu m2, [r1 + 208] + palignr m3, m2, 2 + movu m4, [r1 + 224] + palignr m5, m4, 2 + movu m6, [r1 + 240] + pavgw m0, m1 + palignr m1, m6, 2 + pavgw m2, m3 + pavgw m4, m5 + pavgw m6, m1 + pshufb m0, m0, m7 + pshufb m2, m2, m7 + pshufb m4, m4, m7 + pshufb m6, m6, m7 + + punpcklqdq m0, m2 + movu [r0 + 96], m0 + punpcklqdq m4, m6 + movu [r0 + 112], m4 + +%else mova m7, [deinterleave_shuf] movu m0, [r1] @@ -1697,7 +1789,7 @@ movu [r0 + 32], m0 punpcklqdq m4, m6 movu [r0 + 48], m4 - +%endif RET ;----------------------------------------------------------------- _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel