# HG changeset patch # User Kalyan Goswami<kal...@multicorewareinc.com> # Date 1500980022 -19800 # Tue Jul 25 16:23:42 2017 +0530 # Node ID 9e1401dcdfc3c9fb633d81b7b39321ac5969a245 # Parent 723c72ffe3eacba3db73eb46332f7cf5c97efa8a x86:AVX-512 blockfill_s_32x32
Size | AVX2 performance | AVX512 performance ------------------------------------------------ 32x32 | 4.58x | 9.73x diff -r 723c72ffe3ea -r 9e1401dcdfc3 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue Jul 25 16:17:13 2017 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Jul 25 16:23:42 2017 +0530 @@ -3866,6 +3866,8 @@ p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_avx512); p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_avx512); + p.cu[BLOCK_32x32].blockfill_s = PFX(blockfill_s_32x32_avx512); + p.cu[BLOCK_64x64].add_ps = PFX(pixel_add_ps_64x64_avx512); p.cu[BLOCK_32x32].add_ps = PFX(pixel_add_ps_32x32_avx512); p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps = PFX(pixel_add_ps_32x32_avx2); diff -r 723c72ffe3ea -r 9e1401dcdfc3 source/common/x86/blockcopy8.asm --- a/source/common/x86/blockcopy8.asm Tue Jul 25 16:17:13 2017 +0530 +++ b/source/common/x86/blockcopy8.asm Tue Jul 25 16:23:42 2017 +0530 @@ -2484,6 +2484,25 @@ movu [r0 + r3 + 32], m0 RET +;-------------------------------------------------------------------- +; void blockfill_s_32x32(int16_t* dst, intptr_t dstride, int16_t val) +;-------------------------------------------------------------------- +INIT_ZMM avx512 +cglobal blockfill_s_32x32, 3, 4, 1 +add r1, r1 +lea r3, [3 * r1] +movd xm0, r2d +vpbroadcastw m0, xm0 + +%rep 8 +movu [r0], m0 +movu [r0 + r1], m0 +movu [r0 + 2 * r1], m0 +movu [r0 + r3], m0 +lea r0, [r0 + 4 * r1] +%endrep +RET + ;----------------------------------------------------------------------------- ; void blockcopy_ps_2x4(int16_t* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride); ;----------------------------------------------------------------------------- diff -r 723c72ffe3ea -r 9e1401dcdfc3 source/common/x86/blockcopy8.h --- a/source/common/x86/blockcopy8.h Tue Jul 25 16:17:13 2017 +0530 +++ b/source/common/x86/blockcopy8.h Tue Jul 25 16:23:42 2017 +0530 @@ -47,6 +47,7 @@ FUNCDEF_TU(void, blockfill_s, sse2, int16_t* dst, intptr_t dstride, int16_t val); FUNCDEF_TU(void, blockfill_s, avx2, int16_t* dst, intptr_t dstride, int16_t val); +FUNCDEF_TU(void, blockfill_s, avx512, int16_t* dst, intptr_t dstride, int16_t val); FUNCDEF_CHROMA_PU(void, blockcopy_ss, sse2, int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); FUNCDEF_CHROMA_PU(void, blockcopy_ss, avx, int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel