# HG changeset patch # User Jayashri Murugan <jayas...@multicorewareinc.com> # Date 1500964406 -19800 # Tue Jul 25 12:03:26 2017 +0530 # Node ID 4978d583e2e82aec1f09d94ecdf52191eac7ceb5 # Parent 2ad06d32a8465ce20e673c819b917a7524ecf8e9 x86: AVX512 blockcopy_ss_32x32, blockcopy_pp_32xN, blockcopy_ps_32x32 and blockcopy_sp_32x32 for HIGH_BIT_DEPTH
HIGH_BIT_DEPTH: Primitive | AVX2 performance | AVX512 performance ------------------------------------------------------- copy_ss[32x32] | 1.83x | 3.91x copy_pp[32x64] | 1.98x | 3.22x copy_pp[32x32] | 1.91x | 4.11x copy_pp[32x24] | 2.06x | 4.20x copy_pp[32x16] | 1.80x | 4.60x copy_ps[32x32] | 1.90x | 4.60x copy_sp[32x32] | 2.02x | 4.78x diff -r 2ad06d32a846 -r 4978d583e2e8 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue Jul 25 12:02:13 2017 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Jul 25 12:03:26 2017 +0530 @@ -2205,6 +2205,31 @@ p.pu[LUMA_64x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x16_avx512); p.cu[BLOCK_64x64].copy_ps = (copy_ps_t)PFX(blockcopy_ss_64x64_avx512); p.cu[BLOCK_64x64].copy_sp = (copy_sp_t)PFX(blockcopy_ss_64x64_avx512); + + // 32 X N + p.cu[BLOCK_32x32].copy_ss = PFX(blockcopy_ss_32x32_avx512); + p.pu[LUMA_32x64].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x64_avx512); + p.pu[LUMA_32x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x32_avx512); + p.pu[LUMA_32x24].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x24_avx512); + p.pu[LUMA_32x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x16_avx512); + p.pu[LUMA_32x8].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x8_avx512); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x8_avx512); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x16_avx512); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x24_avx512); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x32_avx512); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x16_avx512); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x32_avx512); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x48_avx512); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].copy_pp = (copy_pp_t)PFX(blockcopy_ss_32x64_avx512); + p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].copy_ss = PFX(blockcopy_ss_32x32_avx512); + p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].copy_ss = PFX(blockcopy_ss_32x64_avx512); + p.cu[BLOCK_32x32].copy_ps = (copy_ps_t)PFX(blockcopy_ss_32x32_avx512); + p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].copy_ps = (copy_ps_t)PFX(blockcopy_ss_32x32_avx512); + p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].copy_ps = (copy_ps_t)PFX(blockcopy_ss_32x64_avx512); + p.cu[BLOCK_32x32].copy_sp = (copy_sp_t)PFX(blockcopy_ss_32x32_avx512); + p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].copy_sp = (copy_sp_t)PFX(blockcopy_ss_32x32_avx512); + p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].copy_sp = (copy_sp_t)PFX(blockcopy_ss_32x64_avx512); + } } #else // if HIGH_BIT_DEPTH _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel