# HG changeset patch # User Vignesh Vijayakumar<vign...@multicorewareinc.com> # Date 1513077484 -19800 # Tue Dec 12 16:48:04 2017 +0530 # Node ID 42fe321e5cdf9ad260e4e5c7a64137a8b7601915 # Parent d6873e0a0786cd732304a94812a28914978113e3 x86: AVX512 optimise intra_pred_dc_32 for high bit depth
Remove using phaddd instruction in code diff -r d6873e0a0786 -r 42fe321e5cdf source/common/x86/intrapred16.asm --- a/source/common/x86/intrapred16.asm Mon Dec 11 17:13:36 2017 +0530 +++ b/source/common/x86/intrapred16.asm Tue Dec 12 16:48:04 2017 +0530 @@ -688,26 +688,25 @@ movu [r0 + r2 * 1 + 0], m0 movu [r0 + r2 * 1 + mmsize], m0 RET - INIT_ZMM avx512 -cglobal intra_pred_dc32, 3,3,17 +cglobal intra_pred_dc32, 3,3,2 add r2, 2 add r1d, r1d - movu m16, [r2] + movu m0, [r2] movu m1, [r2 + 2 * mmsize] - paddw m16, m1 - vextracti32x8 ym1, m16, 1 - paddw ym16, ym1 - vextracti32x4 xm1, m16, 1 - paddw xm16, xm1 - pmaddwd xm16, [pw_1] - movhlps xm1, xm16 - paddd xm16, xm1 - phaddd xm16, xm16 - paddd xm16, [pd_32] ; sum = sum + 32 - psrld xm16, 6 ; sum = sum / 64 - vpbroadcastw m0, xm16 - + paddw m0, m1 + vextracti32x8 ym1, m0, 1 + paddw ym0, ym1 + vextracti32x4 xm1, m0, 1 + paddw xm0, xm1 + pmaddwd xm0, [pw_1] + movhlps xm1, xm0 + paddd xm0, xm1 + vpsrldq xm1, xm0, 4 + paddd xm0, xm1 + paddd xm0, [pd_32] ; sum = sum + 32 + psrld xm0, 6 ; sum = sum / 64 + vpbroadcastw m0, xm0 lea r2, [r1 * 3] ; store DC 32x32 movu [r0 + r1 * 0 + 0], m0 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel