# HG changeset patch # User Vignesh Vijayakumar<vign...@multicorewareinc.com> # Date 1515559539 -19800 # Wed Jan 10 10:15:39 2018 +0530 # Node ID a4d60c45fdce6797486f25f5f319615b25bd86f0 # Parent a5d29083237f28a944143862f980960c3f2b15ff x86: AVX512 optimise cpy1Dto2D_shr Work on shift in xmm registers when possible
diff -r a5d29083237f -r a4d60c45fdce source/common/x86/blockcopy8.asm --- a/source/common/x86/blockcopy8.asm Mon Jan 08 16:26:59 2018 +0530 +++ b/source/common/x86/blockcopy8.asm Wed Jan 10 10:15:39 2018 +0530 @@ -6781,10 +6781,10 @@ cglobal cpy1Dto2D_shr_16, 3, 5, 4 shl r2d, 1 movd xm0, r3m - pcmpeqw ymm1, ymm1 - psllw ym1, ymm1, xm0 - psraw ym1, 1 - vinserti32x8 m1, ym1, 1 + pcmpeqw xmm1, xmm1 + psllw xm1, xmm1, xm0 + psraw xm1, 1 + vpbroadcastw m1, xm1 mov r3d, 4 lea r4, [r2 * 3] @@ -6903,10 +6903,10 @@ cglobal cpy1Dto2D_shr_32, 3, 4, 6 shl r2d, 1 movd xm0, r3m - pcmpeqw ymm1, ymm1 - psllw ym1, ymm1, xm0 - psraw ym1, 1 - vinserti32x8 m1, ym1, 1 + pcmpeqw xmm1, xmm1 + psllw xm1, xmm1, xm0 + psraw xm1, 1 + vpbroadcastw m1, xm1 mov r3d, 16 .loop: _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel