some problem: 1. load coeff from constant array use unalignment, eg: 'movu m0, [r4+x]' 2. '%rep' make large code, it faster on testbench but slower on Encoder.
3. Disable two of 8x8 functions At 2014-02-03 15:06:59,[email protected] wrote: ># HG changeset patch ># User Dnyaneshwar G <[email protected]> ># Date 1391410961 -19800 ># Mon Feb 03 12:32:41 2014 +0530 ># Node ID 7ad3e3504ea6e5f7355b21c4c7de44ad9e1c0a2a ># Parent aab88ed133647b779b0a1ca33a1e20584103ef7d >asm: assembly code for IntraAng32x32 all modes > >diff -r aab88ed13364 -r 7ad3e3504ea6 source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Sun Feb 02 13:09:26 2014 -0600 >+++ b/source/common/x86/asm-primitives.cpp Mon Feb 03 12:32:41 2014 +0530 >@@ -572,9 +572,6 @@ > #define SETUP_INTRA_ANG32(mode, fno, cpu) \ > p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## > cpu; > >-#define SETUP_INTRA_ANG32(mode, fno, cpu) \ >- p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## >cpu; >- > namespace x265 { > // private x265 namespace > void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) >@@ -1013,10 +1010,37 @@ > SETUP_INTRA_ANG4(32, 4, sse4); > SETUP_INTRA_ANG4(33, 3, sse4); > >+ SETUP_INTRA_ANG32(3, 3, sse4); >+ SETUP_INTRA_ANG32(4, 4, sse4); >+ SETUP_INTRA_ANG32(5, 5, sse4); >+ SETUP_INTRA_ANG32(6, 6, sse4); >+ SETUP_INTRA_ANG32(7, 7, sse4); >+ SETUP_INTRA_ANG32(8, 8, sse4); >+ SETUP_INTRA_ANG32(9, 9, sse4); >+ SETUP_INTRA_ANG32(10, 10, sse4); >+ SETUP_INTRA_ANG32(11, 11, sse4); >+ SETUP_INTRA_ANG32(12, 12, sse4); >+ SETUP_INTRA_ANG32(13, 13, sse4); >+ SETUP_INTRA_ANG32(14, 14, sse4); >+ SETUP_INTRA_ANG32(15, 15, sse4); >+ SETUP_INTRA_ANG32(16, 16, sse4); > SETUP_INTRA_ANG32(17, 17, sse4); >- >- SETUP_INTRA_ANG8(3, 3, sse4); >- SETUP_INTRA_ANG8(33, 3, sse4); >+ SETUP_INTRA_ANG32(18, 18, sse4);
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
