# HG changeset patch # User Jayashri Murugan <jayas...@multicorewareinc.com> # Date 1500964333 -19800 # Tue Jul 25 12:02:13 2017 +0530 # Node ID 2ad06d32a8465ce20e673c819b917a7524ecf8e9 # Parent 6b3b8ef0f37e0f7860f4f43c99e581674b19f9e3 x86: AVX512 blockcopy_ss_64x64, blockcopy_pp_64xN, blockcopy_ps_64x64 and blockcopy_sp_64x64 for HIGH_BIT_DEPTH
HIGH_BIT_DEPTH: Primitive | AVX2 performance | AVX512 performance ------------------------------------------------------- copy_ss[64x64] | 1.38x | 2.85x copy_pp[64x64] | 1.91x | 3.03x copy_pp[64x48] | 1.90x | 3.21x copy_pp[64x32] | 1.99x | 3.26x copy_pp[64x16] | 2.01x | 3.56x copy_ps[64x64] | 1.78x | 3.46x copy_sp[64x64] | 1.80x | 3.25x diff -r 6b3b8ef0f37e -r 2ad06d32a846 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Fri Jul 21 14:55:49 2017 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Jul 25 12:02:13 2017 +0530 @@ -2191,10 +2191,20 @@ if (cpuMask & X265_CPU_AVX512) { p.cu[BLOCK_16x16].var = PFX(pixel_var_16x16_avx512); + p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_avx512); p.cu[BLOCK_64x64].sub_ps = PFX(pixel_sub_ps_64x64_avx512); p.cu[BLOCK_32x32].sub_ps = PFX(pixel_sub_ps_32x32_avx512); p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sub_ps = PFX(pixel_sub_ps_32x32_avx512); p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sub_ps = PFX(pixel_sub_ps_32x64_avx512); + + // 64 X N + p.cu[BLOCK_64x64].copy_ss = PFX(blockcopy_ss_64x64_avx512); + p.pu[LUMA_64x64].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x64_avx512); + p.pu[LUMA_64x48].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x48_avx512); + p.pu[LUMA_64x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x32_avx512); + p.pu[LUMA_64x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x16_avx512); + p.cu[BLOCK_64x64].copy_ps = (copy_ps_t)PFX(blockcopy_ss_64x64_avx512); + p.cu[BLOCK_64x64].copy_sp = (copy_sp_t)PFX(blockcopy_ss_64x64_avx512); } } #else // if HIGH_BIT_DEPTH @@ -3727,7 +3737,6 @@ p.integral_inith[INTEGRAL_24] = PFX(integral24h_avx2); p.integral_inith[INTEGRAL_32] = PFX(integral32h_avx2); - p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_avx512); } if (cpuMask & X265_CPU_AVX512) { _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel