# HG changeset patch # User Praveen Tiwari # Date 1382540461 -19800 # Node ID dccd8e5e1850d1885a8cd321926725542e1afe22 # Parent e4debc47482e921231de8928daf575a933899c29 asm code for interp_4tap_vert_pp_8x8
diff -r e4debc47482e -r dccd8e5e1850 source/common/x86/ipfilter8.asm --- a/source/common/x86/ipfilter8.asm Wed Oct 23 20:21:41 2013 +0530 +++ b/source/common/x86/ipfilter8.asm Wed Oct 23 20:31:01 2013 +0530 @@ -785,3 +785,104 @@ FILTER_V8_W8_8x6 8, 6 RET + +;----------------------------------------------------------------------------- +; void interp_4tap_vert_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx) +;----------------------------------------------------------------------------- +%macro FILTER_V8_W8_H8_H16_H32 2 +INIT_XMM sse4 +cglobal interp_4tap_vert_pp_%1x%2, 4, 7, 8 + +mov r4d, r4m +sub r0, r1 + +%ifdef PIC +lea r6, [tab_ChromaCoeff] +movd m5, [r6 + r4 * 4] +%else +movd m5, [tab_ChromaCoeff + r4 * 4] +%endif + +pshufb m6, m5, [tab_Vm] +pshufb m5, [tab_Vm + 16] +mova m4, [tab_c_512] + +xor r4, r4 +add r4d, %2 + +.loop +movq m0, [r0] +movq m1, [r0 + r1] +movq m2, [r0 + 2 * r1] +lea r5, [r0 + 2 * r1] +movq m3, [r5 + r1] + +punpcklbw m0, m1 +punpcklbw m7, m2, m3 + +pmaddubsw m0, m6 +pmaddubsw m7, m5 + +paddw m0, m7 + +pmulhrsw m0, m4 +packuswb m0, m0 +movh [r2], m0 + +movq m0, [r0 + 4 * r1] + +punpcklbw m1, m2 +punpcklbw m7, m3, m0 + +pmaddubsw m1, m6 +pmaddubsw m7, m5 + +paddw m1, m7 + +pmulhrsw m1, m4 +packuswb m1, m1 +movh [r2 + r3], m1 + +lea r6, [r0 + 4 * r1] +movq m1, [r6 + r1] + +punpcklbw m2, m3 +punpcklbw m7, m0, m1 + +pmaddubsw m2, m6 +pmaddubsw m7, m5 + +paddw m2, m7 + +pmulhrsw m2, m4 +packuswb m2, m2 +movh [r2 + 2 * r3], m2 + +movq m2, [r6 + 2 * r1] + +punpcklbw m3, m0 +punpcklbw m1, m2 + +pmaddubsw m3, m6 +pmaddubsw m1, m5 + +paddw m3, m1 + +pmulhrsw m3, m4 +packuswb m3, m3 + +lea r5, [r2 + 2 * r3] +movh [r5 + r3], m3 + +lea r0, [r0 + 4 * r1] +lea r2, [r2 + 4 * r3] + +sub r4, 4 +jnz .loop +RET +%endmacro + +;----------------------------------------------------------------------------- +; void interp_4tap_vert_pp_8x8(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx) +;----------------------------------------------------------------------------- +FILTER_V8_W8_H8_H16_H32 8, 8 _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
