right
At 2015-02-25 17:34:35,"Divya Manivannan" <[email protected]> wrote: ># HG changeset patch ># User Divya Manivannan <[email protected]> ># Date 1424856844 -19800 ># Wed Feb 25 15:04:04 2015 +0530 ># Node ID d4ab6285fbe3c607f4355f3a9ced1f98686a070c ># Parent 34b53460c45c69a5b9e318c094c1c8c7744b4e99 >asm-avx2: filter_vpp[16x8], filter_vps[16x8]: improve 567c->530c, 543c->420c > >diff -r 34b53460c45c -r d4ab6285fbe3 source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Wed Feb 25 14:24:55 2015 +0530 >+++ b/source/common/x86/asm-primitives.cpp Wed Feb 25 15:04:04 2015 +0530 >@@ -1801,12 +1801,15 @@ > p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_vpp = > x265_interp_4tap_vert_pp_2x4_avx2; > p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_vpp = > x265_interp_4tap_vert_pp_4x8_avx2; > p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vpp = > x265_interp_4tap_vert_pp_8x4_avx2; >+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vpp = >x265_interp_4tap_vert_pp_16x8_avx2; > > p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_vps = > x265_interp_4tap_vert_ps_2x4_avx2; > p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_vps = > x265_interp_4tap_vert_ps_4x4_avx2; > p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_vps = > x265_interp_4tap_vert_ps_4x8_avx2; > p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vps = > x265_interp_4tap_vert_ps_8x4_avx2; > p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vps = > x265_interp_4tap_vert_ps_8x8_avx2; >+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vps = >x265_interp_4tap_vert_ps_16x8_avx2; >+ > // color space i422 > p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vpp = > x265_interp_4tap_vert_pp_4x4_avx2; > >diff -r 34b53460c45c -r d4ab6285fbe3 source/common/x86/ipfilter8.asm >--- a/source/common/x86/ipfilter8.asm Wed Feb 25 14:24:55 2015 +0530 >+++ b/source/common/x86/ipfilter8.asm Wed Feb 25 15:04:04 2015 +0530 >@@ -4626,6 +4626,162 @@ > FILTER_VER_CHROMA_AVX2_16x16 pp > FILTER_VER_CHROMA_AVX2_16x16 ps > >+%macro FILTER_VER_CHROMA_AVX2_16x8 1 >+INIT_YMM avx2 >+cglobal interp_4tap_vert_%1_16x8, 4, 7, 7 >+ mov r4d, r4m >+ shl r4d, 6 >+ >+%ifdef PIC >+ lea r5, [tab_ChromaCoeffVer_32] >+ add r5, r4 >+%else >+ lea r5, [tab_ChromaCoeffVer_32 + r4] >+%endif >+ >+ lea r4, [r1 * 3] >+ sub r0, r1 >+%ifidn %1,pp >+ mova m6, [pw_512] >+%else >+ add r3d, r3d >+ mova m6, [pw_2000] >+%endif >+ lea r6, [r3 * 3] >+ >+ movu xm0, [r0] ; m0 = row 0 >+ movu xm1, [r0 + r1] ; m1 = row 1 >+ punpckhbw xm2, xm0, xm1 >+ punpcklbw xm0, xm1 >+ vinserti128 m0, m0, xm2, 1 >+ pmaddubsw m0, [r5] >+ movu xm2, [r0 + r1 * 2] ; m2 = row 2 >+ punpckhbw xm3, xm1, xm2 >+ punpcklbw xm1, xm2 >+ vinserti128 m1, m1, xm3, 1 >+ pmaddubsw m1, [r5] >+ movu xm3, [r0 + r4] ; m3 = row 3 >+ punpckhbw xm4, xm2, xm3 >+ punpcklbw xm2, xm3 >+ vinserti128 m2, m2, xm4, 1 >+ pmaddubsw m4, m2, [r5 + mmsize] >+ paddw m0, m4 >+ pmaddubsw m2, [r5] >+ lea r0, [r0 + r1 * 4] >+ movu xm4, [r0] ; m4 = row 4 >+ punpckhbw xm5, xm3, xm4 >+ punpcklbw xm3, xm4 >+ vinserti128 m3, m3, xm5, 1 >+ pmaddubsw m5, m3, [r5 + mmsize] >+ paddw m1, m5 >+ pmaddubsw m3, [r5] >+%ifidn %1,pp >+ pmulhrsw m0, m6 ; m0 = word: row 0 >+ pmulhrsw m1, m6 ; m1 = word: row 1 >+ packuswb m0, m1 >+ vpermq m0, m0, 11011000b >+ vextracti128 xm1, m0, 1 >+ movu [r2], xm0 >+ movu [r2 + r3], xm1 >+%else >+ psubw m0, m6 ; m0 = word: row 0 >+ psubw m1, m6 ; m1 = word: row 1 >+ movu [r2], m0 >+ movu [r2 + r3], m1 >+%endif >+ >+ movu xm0, [r0 + r1] ; m0 = row 5 >+ punpckhbw xm1, xm4, xm0 >+ punpcklbw xm4, xm0 >+ vinserti128 m4, m4, xm1, 1 >+ pmaddubsw m1, m4, [r5 + mmsize] >+ paddw m2, m1 >+ pmaddubsw m4, [r5] >+ movu xm1, [r0 + r1 * 2] ; m1 = row 6 >+ punpckhbw xm5, xm0, xm1 >+ punpcklbw xm0, xm1 >+ vinserti128 m0, m0, xm5, 1 >+ pmaddubsw m5, m0, [r5 + mmsize] >+ paddw m3, m5 >+ pmaddubsw m0, [r5] >+%ifidn %1,pp >+ pmulhrsw m2, m6 ; m2 = word: row 2 >+ pmulhrsw m3, m6 ; m3 = word: row 3 >+ packuswb m2, m3 >+ vpermq m2, m2, 11011000b >+ vextracti128 xm3, m2, 1 >+ movu [r2 + r3 * 2], xm2 >+ movu [r2 + r6], xm3 >+%else >+ psubw m2, m6 ; m2 = word: row 2 >+ psubw m3, m6 ; m3 = word: row 3 >+ movu [r2 + r3 * 2], m2 >+ movu [r2 + r6], m3 >+%endif >+ >+ movu xm2, [r0 + r4] ; m2 = row 7 >+ punpckhbw xm3, xm1, xm2 >+ punpcklbw xm1, xm2 >+ vinserti128 m1, m1, xm3, 1 >+ pmaddubsw m3, m1, [r5 + mmsize] >+ paddw m4, m3 >+ pmaddubsw m1, [r5] >+ lea r0, [r0 + r1 * 4] >+ movu xm3, [r0] ; m3 = row 8 >+ punpckhbw xm5, xm2, xm3 >+ punpcklbw xm2, xm3 >+ vinserti128 m2, m2, xm5, 1 >+ pmaddubsw m5, m2, [r5 + mmsize] >+ paddw m0, m5 >+ pmaddubsw m2, [r5] >+ lea r2, [r2 + r3 * 4] >+%ifidn %1,pp >+ pmulhrsw m4, m6 ; m4 = word: row 4 >+ pmulhrsw m0, m6 ; m0 = word: row 5 >+ packuswb m4, m0 >+ vpermq m4, m4, 11011000b >+ vextracti128 xm0, m4, 1 >+ movu [r2], xm4 >+ movu [r2 + r3], xm0 >+%else >+ psubw m4, m6 ; m4 = word: row 4 >+ psubw m0, m6 ; m0 = word: row 5 >+ movu [r2], m4 >+ movu [r2 + r3], m0 >+%endif >+ >+ movu xm5, [r0 + r1] ; m5 = row 9 >+ punpckhbw xm4, xm3, xm5 >+ punpcklbw xm3, xm5 >+ vinserti128 m3, m3, xm4, 1 >+ pmaddubsw m3, [r5 + mmsize] >+ paddw m1, m3 >+ movu xm4, [r0 + r1 * 2] ; m4 = row 10 >+ punpckhbw xm0, xm5, xm4 >+ punpcklbw xm5, xm4 >+ vinserti128 m5, m5, xm0, 1 >+ pmaddubsw m5, [r5 + mmsize] >+ paddw m2, m5 >+%ifidn %1,pp >+ pmulhrsw m1, m6 ; m1 = word: row 6 >+ pmulhrsw m2, m6 ; m2 = word: row 7 >+ packuswb m1, m2 >+ vpermq m1, m1, 11011000b >+ vextracti128 xm2, m1, 1 >+ movu [r2 + r3 * 2], xm1 >+ movu [r2 + r6], xm2 >+%else >+ psubw m1, m6 ; m1 = word: row 6 >+ psubw m2, m6 ; m2 = word: row 7 >+ movu [r2 + r3 * 2], m1 >+ movu [r2 + r6], m2 >+%endif >+ RET >+%endmacro >+ >+FILTER_VER_CHROMA_AVX2_16x8 pp >+FILTER_VER_CHROMA_AVX2_16x8 ps >+ > ;----------------------------------------------------------------------------- > ;void interp_4tap_vert_pp_24x32(pixel *src, intptr_t srcStride, pixel *dst, > intptr_t dstStride, int coeffIdx) > ;----------------------------------------------------------------------------- >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
