right
At 2015-03-28 10:42:44,[email protected] wrote: ># HG changeset patch ># User David T Yuen <[email protected]> ># Date 1427505936 25200 ># Node ID 3ea097e4acb7e6ce9161b79968c3653741003daf ># Parent 4314cdf34f36c7a7fc3a4b3e744cd05c6ab90352 >asm: intra_pred_ang4_12_sse2 > >This is backported from sse4 code and replaces c code. > >64-bit > >./test/TestBench --testbench intrapred | grep "intra_ang_4x4\[12\]" >intra_ang_4x4[12] 3.35x 617.45 2070.22 > >32-bit > >./test/TestBench --testbench intrapred | grep "intra_ang_4x4\[12\]" >intra_ang_4x4[12] 3.87x 757.54 2935.42 > >diff -r 4314cdf34f36 -r 3ea097e4acb7 source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Fri Mar 27 18:20:57 2015 -0700 >+++ b/source/common/x86/asm-primitives.cpp Fri Mar 27 18:25:36 2015 -0700 >@@ -1206,6 +1206,7 @@ > p.cu[BLOCK_4x4].intra_pred[9] = x265_intra_pred_ang4_9_sse2; > p.cu[BLOCK_4x4].intra_pred[10] = x265_intra_pred_ang4_10_sse2; > p.cu[BLOCK_4x4].intra_pred[11] = x265_intra_pred_ang4_11_sse2; >+ p.cu[BLOCK_4x4].intra_pred[12] = x265_intra_pred_ang4_12_sse2; > p.cu[BLOCK_4x4].intra_pred[26] = x265_intra_pred_ang4_26_sse2; > > p.cu[BLOCK_4x4].calcresidual = x265_getResidual4_sse2; >diff -r 4314cdf34f36 -r 3ea097e4acb7 source/common/x86/intrapred.h >--- a/source/common/x86/intrapred.h Fri Mar 27 18:20:57 2015 -0700 >+++ b/source/common/x86/intrapred.h Fri Mar 27 18:25:36 2015 -0700 >@@ -57,6 +57,7 @@ > DECL_ANG(4, 9, sse2); > DECL_ANG(4, 10, sse2); > DECL_ANG(4, 11, sse2); >+DECL_ANG(4, 12, sse2); > DECL_ANG(4, 26, sse2); > > DECL_ANG(4, 2, ssse3); >diff -r 4314cdf34f36 -r 3ea097e4acb7 source/common/x86/intrapred8.asm >--- a/source/common/x86/intrapred8.asm Fri Mar 27 18:20:57 2015 -0700 >+++ b/source/common/x86/intrapred8.asm Fri Mar 27 18:25:36 2015 -0700 >@@ -1574,6 +1574,27 @@ > mova m7, [r3 + 0 * 16] ; [30] > jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ > .do_filter4x4) > >+cglobal intra_pred_ang4_12, 4,5,8 >+ xor r4d, r4d >+ cmp r3m, byte 24 >+ mov r3d, 8 >+ cmove r3d, r4d >+ >+ movd m1, [r2 + r3 + 1] >+ movh m0, [r2 - 7] >+ punpcklbw m1, m1 >+ punpcklqdq m0, m1 >+ psrldq m0, 7 >+ punpcklqdq m0, m0 >+ mova m2, m0 >+ >+ lea r3, [pw_ang_table + 20 * 16] >+ mova m4, [r3 + 7 * 16] ; [27] >+ mova m5, [r3 + 2 * 16] ; [22] >+ mova m6, [r3 - 3 * 16] ; [17] >+ mova m7, [r3 - 8 * 16] ; [12] >+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ >.do_filter4x4) >+ > ;--------------------------------------------------------------------------------------------- > ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int > dirMode, int bFilter) > ;--------------------------------------------------------------------------------------------- >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
