# HG changeset patch # User Dnyaneshwar G <dnyanesh...@multicorewareinc.com> # Date 1441715051 -19800 # Tue Sep 08 17:54:11 2015 +0530 # Node ID 89c234e68523b05550b8c5197b83849544dc97d1 # Parent 365f7ed4d89628d49cd6af8d81d4edc01f73ffad asm: AVX2 code for pixel_var primitive, improved over 40% than SSE
diff -r 365f7ed4d896 -r 89c234e68523 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue Sep 08 16:38:01 2015 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Sep 08 17:54:11 2015 +0530 @@ -2729,6 +2729,10 @@ #if X86_64 if (cpuMask & X265_CPU_AVX2) { + p.cu[BLOCK_16x16].var = PFX(pixel_var_16x16_avx2); + p.cu[BLOCK_32x32].var = PFX(pixel_var_32x32_avx2); + p.cu[BLOCK_64x64].var = PFX(pixel_var_64x64_avx2); + p.cu[BLOCK_4x4].intra_filter = PFX(intra_filter_4x4_avx2); p.planecopy_sp = PFX(downShift_16_avx2); diff -r 365f7ed4d896 -r 89c234e68523 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Tue Sep 08 16:38:01 2015 +0530 +++ b/source/common/x86/pixel-util8.asm Tue Sep 08 17:54:11 2015 +0530 @@ -6397,6 +6397,78 @@ movd edx, xm6 %endif RET + +INIT_YMM avx2 +cglobal pixel_var_32x32, 2,4,7 + VAR_START 0 + mov r2d, 16 + +.loop: + pmovzxbw m0, [r0] + pmovzxbw m3, [r0 + 16] + pmovzxbw m1, [r0 + r1] + pmovzxbw m4, [r0 + r1 + 16] + + lea r0, [r0 + r1 * 2] + + VAR_CORE + + dec r2d + jg .loop + + vextracti128 xm0, m5, 1 + vextracti128 xm1, m6, 1 + paddw xm5, xm0 + paddd xm6, xm1 + HADDW xm5, xm2 + HADDD xm6, xm1 + +%if ARCH_X86_64 + punpckldq xm5, xm6 + movq rax, xm5 +%else + movd eax, xm5 + movd edx, xm6 +%endif + RET + +INIT_YMM avx2 +cglobal pixel_var_64x64, 2,4,7 + VAR_START 0 + mov r2d, 64 + +.loop: + pmovzxbw m0, [r0] + pmovzxbw m3, [r0 + 16] + pmovzxbw m1, [r0 + mmsize] + pmovzxbw m4, [r0 + mmsize + 16] + + lea r0, [r0 + r1] + + VAR_CORE + + dec r2d + jg .loop + + pxor m1, m1 + punpcklwd m0, m5, m1 + punpckhwd m5, m1 + paddd m5, m0 + vextracti128 xm2, m5, 1 + vextracti128 xm1, m6, 1 + paddd xm5, xm2 + paddd xm6, xm1 + HADDD xm5, xm2 + HADDD xm6, xm1 + +%if ARCH_X86_64 + punpckldq xm5, xm6 + movq rax, xm5 +%else + movd eax, xm5 + movd edx, xm6 +%endif + RET %endif ; !HIGH_BIT_DEPTH %macro VAR2_END 3 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel