# HG changeset patch # User Jayashri Murugan <jayas...@multicorewareinc.com> # Date 1517294626 -19800 # Tue Jan 30 12:13:46 2018 +0530 # Node ID b80e844209ecd0abc896df94306a5ef96b27b918 # Parent e82bfd58acb99cd4c2e4767b1afdd3750881a68e X86: AVX512 intra_pred_ang16 mode 8 and 28 high bit depth
Mode | AVX2 performance | AVX512 performance --------------------------------------------------- 8 | 9.31x | 10.78x 28 | 12.80x | 15.21x diff -r e82bfd58acb9 -r b80e844209ec source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Fri Jan 19 16:56:49 2018 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Jan 30 12:13:46 2018 +0530 @@ -3113,14 +3113,14 @@ p.cu[BLOCK_32x32].intra_pred[6] = PFX(intra_pred_ang32_6_avx512); p.cu[BLOCK_32x32].intra_pred[29] = PFX(intra_pred_ang32_29_avx512); p.cu[BLOCK_32x32].intra_pred[7] = PFX(intra_pred_ang32_7_avx512); - + p.cu[BLOCK_32x32].intra_pred[8] = PFX(intra_pred_ang32_8_avx512); + p.cu[BLOCK_32x32].intra_pred[28] = PFX(intra_pred_ang32_28_avx512); p.cu[BLOCK_16x16].intra_pred[9] = PFX(intra_pred_ang16_9_avx512); p.cu[BLOCK_16x16].intra_pred[11] = PFX(intra_pred_ang16_11_avx512); p.cu[BLOCK_16x16].intra_pred[25] = PFX(intra_pred_ang16_25_avx512); p.cu[BLOCK_16x16].intra_pred[27] = PFX(intra_pred_ang16_27_avx512); - p.cu[BLOCK_32x32].intra_pred[8] = PFX(intra_pred_ang32_8_avx512); - p.cu[BLOCK_32x32].intra_pred[28] = PFX(intra_pred_ang32_28_avx512); - + p.cu[BLOCK_16x16].intra_pred[8] = PFX(intra_pred_ang16_8_avx512); + p.cu[BLOCK_16x16].intra_pred[28] = PFX(intra_pred_ang16_28_avx512); p.cu[BLOCK_16x16].intra_pred[5] = PFX(intra_pred_ang16_5_avx512); p.cu[BLOCK_16x16].intra_pred[31] = PFX(intra_pred_ang16_31_avx512); p.cu[BLOCK_16x16].intra_pred[4] = PFX(intra_pred_ang16_4_avx512); diff -r e82bfd58acb9 -r b80e844209ec source/common/x86/intrapred16.asm --- a/source/common/x86/intrapred16.asm Fri Jan 19 16:56:49 2018 +0530 +++ b/source/common/x86/intrapred16.asm Tue Jan 30 12:13:46 2018 +0530 @@ -11843,6 +11843,27 @@ packusdw m11, m3 TRANSPOSE_STORE_AVX2 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 16 ret +cglobal intra_pred_ang16_8, 3,7,16 + add r2, 64 + xor r6d, r6d + lea r3, [ang_table_avx2 + 15 * 32] + add r1d, r1d + lea r4, [r1 * 3] + vbroadcasti32x8 m15, [pd_16] + + call ang16_mode_8_28 + RET + +cglobal intra_pred_ang16_28, 3,7,16 + xor r6d, r6d + inc r6d + lea r3, [ang_table_avx2 + 15 * 32] + add r1d, r1d + lea r4, [r1 * 3] + vbroadcasti32x8 m15, [pd_16] + + call ang16_mode_8_28 + RET ;; angle 16, modes 7 and 29 cglobal ang16_mode_7_29 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel