# HG changeset patch # User Vignesh Vijayakumar<vign...@multicorewareinc.com> # Date 1515750476 -19800 # Fri Jan 12 15:17:56 2018 +0530 # Node ID e4983d90f403d968d6760ae044f86a7a2e1865a2 # Parent 1c2875198a213a5f8d84bff57fcec15727f94a4f x86: AVX512 pixel_satd_64xN
Size | AVX2 performance | AVX512 performance ----------------------------------------------- 64x16 | 10.73x | 13.02x 64x32 | 11.13x | 13.21x 64x48 | 11.13x | 13.19x 64x64 | 11.36x | 13.78x diff -r 1c2875198a21 -r e4983d90f403 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Fri Jan 12 13:52:25 2018 +0530 +++ b/source/common/x86/asm-primitives.cpp Fri Jan 12 15:17:56 2018 +0530 @@ -5347,7 +5347,10 @@ p.pu[LUMA_32x24].satd = PFX(pixel_satd_32x24_avx512); p.pu[LUMA_32x32].satd = PFX(pixel_satd_32x32_avx512); p.pu[LUMA_32x64].satd = PFX(pixel_satd_32x64_avx512); - + p.pu[LUMA_64x16].satd = PFX(pixel_satd_64x16_avx512); + p.pu[LUMA_64x32].satd = PFX(pixel_satd_64x32_avx512); + p.pu[LUMA_64x48].satd = PFX(pixel_satd_64x48_avx512); + p.pu[LUMA_64x64].satd = PFX(pixel_satd_64x64_avx512); p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = PFX(pixel_satd_32x32_avx512); p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = PFX(pixel_satd_32x16_avx512); p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = PFX(pixel_satd_32x24_avx512); diff -r 1c2875198a21 -r e4983d90f403 source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Fri Jan 12 13:52:25 2018 +0530 +++ b/source/common/x86/pixel-a.asm Fri Jan 12 15:17:56 2018 +0530 @@ -14163,6 +14163,38 @@ SATD_32xN_AVX512 32 SATD_32xN_AVX512 48 SATD_32xN_AVX512 64 + +%macro SATD_64xN_AVX512 1 +INIT_ZMM avx512 +cglobal pixel_satd_64x%1, 4,8,8 + lea r4, [3 * r1] + lea r5, [3 * r3] + pxor m6, m6 + mov r6, r0 + mov r7, r2 + +%rep %1/4 - 1 + PROCESS_SATD_32x4_AVX512 + lea r0, [r0 + 4 * r1] + lea r2, [r2 + 4 * r3] +%endrep + PROCESS_SATD_32x4_AVX512 + lea r0, [r6 + mmsize/2] + lea r2, [r7 + mmsize/2] +%rep %1/4 - 1 + PROCESS_SATD_32x4_AVX512 + lea r0, [r0 + 4 * r1] + lea r2, [r2 + 4 * r3] +%endrep + PROCESS_SATD_32x4_AVX512 + SATD_MAIN_AVX512_END + RET +%endmacro + +SATD_64xN_AVX512 16 +SATD_64xN_AVX512 32 +SATD_64xN_AVX512 48 +SATD_64xN_AVX512 64 %endif ; ARCH_X86_64 == 1 && HIGH_BIT_DEPTH == 0 %if ARCH_X86_64 == 1 && HIGH_BIT_DEPTH == 1 INIT_YMM avx2 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel