# HG changeset patch # User Vignesh Vijayakumar # Date 1503908210 -19800 # Mon Aug 28 13:46:50 2017 +0530 # Node ID 45e4dd746cfd9380dbe2344a5754a6ff6e9feed5 # Parent bf199a5eca5be148be8a0c91cd9f2e8e0e908059 x86: AVX512 pixel_avg_weight_64xN
Size | AVX2 performance | AVX512 performance ---------------------------------------------- 64x16 | 41.70x | 60.98x 64x32 | 36.75x | 68.91x 64x48 | 37.31x | 59.07x 64x64 | 37.92x | 58.85x diff -r bf199a5eca5b -r 45e4dd746cfd source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Mon Aug 28 11:58:37 2017 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Aug 28 13:46:50 2017 +0530 @@ -4159,6 +4159,11 @@ p.pu[LUMA_64x48].luma_hpp = PFX(interp_8tap_horiz_pp_64x48_avx512); p.pu[LUMA_64x64].luma_hpp = PFX(interp_8tap_horiz_pp_64x64_avx512); + p.pu[LUMA_64x16].pixelavg_pp = PFX(pixel_avg_64x16_avx512); + p.pu[LUMA_64x32].pixelavg_pp = PFX(pixel_avg_64x32_avx512); + p.pu[LUMA_64x48].pixelavg_pp = PFX(pixel_avg_64x48_avx512); + p.pu[LUMA_64x64].pixelavg_pp = PFX(pixel_avg_64x64_avx512); + } #endif } diff -r bf199a5eca5b -r 45e4dd746cfd source/common/x86/mc-a.asm --- a/source/common/x86/mc-a.asm Mon Aug 28 11:58:37 2017 +0530 +++ b/source/common/x86/mc-a.asm Mon Aug 28 13:46:50 2017 +0530 @@ -5020,6 +5020,58 @@ RET %endif +;----------------------------------------------------------------------------- +;pixel_avg_pp avx512 code start +;----------------------------------------------------------------------------- +%macro PROCESS_PIXELAVG_64x4_AVX512 0 + movu m0, [r2] + movu m2, [r2 + r3] + movu m1, [r4] + movu m3, [r4 + r5] + pavgb m0, m1 + pavgb m2, m3 + movu [r0], m0 + movu [r0 + r1], m2 + + movu m0, [r2 + 2 * r3] + movu m2, [r2 + r7] + movu m1, [r4 + 2 * r5] + movu m3, [r4 + r8] + pavgb m0, m1 + pavgb m2, m3 + movu [r0 + 2 * r1], m0 + movu [r0 + r6], m2 +%endmacro + +;------------------------------------------------------------------------------------------------------------------------------- +;void pixelavg_pp(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int) +;------------------------------------------------------------------------------------------------------------------------------- +%if ARCH_X86_64 && BIT_DEPTH == 8 +%macro PIXEL_AVG_64xN_AVX512 1 +INIT_ZMM avx512 +cglobal pixel_avg_64x%1, 6, 9, 4 + lea r6, [3 * r1] + lea r7, [3 * r3] + lea r8, [3 * r5] + +%rep %1/4 - 1 + PROCESS_PIXELAVG_64x4_AVX512 + lea r2, [r2 + r3 * 4] + lea r4, [r4 + r5 * 4] + lea r0, [r0 + r1 * 4] +%endrep + PROCESS_PIXELAVG_64x4_AVX512 + RET +%endmacro + +PIXEL_AVG_64xN_AVX512 16 +PIXEL_AVG_64xN_AVX512 32 +PIXEL_AVG_64xN_AVX512 48 +PIXEL_AVG_64xN_AVX512 64 +%endif +;----------------------------------------------------------------------------- +;pixel_avg_pp avx512 code end +;----------------------------------------------------------------------------- ;============================================================================= ; pixel avg2 ;============================================================================= _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel