It right now, except last reduce copy, need not new patch
At 2014-01-15 21:49:59,[email protected] wrote: ># HG changeset patch ># User Dnyaneshwar G <[email protected]> ># Date 1389793733 -19800 ># Wed Jan 15 19:18:53 2014 +0530 ># Node ID fec35802092cec9416924eabc072ee0993df34e1 ># Parent 27c2dac98a3c34e6e1b03960d296ef8f2295fefd >asm: code for intra_pred[BLOCK_32x32] mode 2 and 34 > >diff -r 27c2dac98a3c -r fec35802092c source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Tue Dec 17 13:14:20 2013 +0530 >+++ b/source/common/x86/asm-primitives.cpp Wed Jan 15 19:18:53 2014 +0530 >@@ -554,6 +554,9 @@ > #define SETUP_INTRA_ANG16(mode, fno, cpu) \ > p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## > cpu; > >+#define SETUP_INTRA_ANG32(mode, fno, cpu) \ >+ p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## >cpu; >+ > namespace x265 { > // private x265 namespace > void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) >@@ -896,6 +899,8 @@ > SETUP_INTRA_ANG8(34, 2, ssse3); > SETUP_INTRA_ANG16(2, 2, ssse3); > SETUP_INTRA_ANG16(34, 2, ssse3); >+ SETUP_INTRA_ANG32(2, 2, ssse3); >+ SETUP_INTRA_ANG32(34, 2, ssse3); > > p.scale1D_128to64 = x265_scale1D_128to64_ssse3; > p.scale2D_64to32 = x265_scale2D_64to32_ssse3; >diff -r 27c2dac98a3c -r fec35802092c source/common/x86/intrapred.h >--- a/source/common/x86/intrapred.h Tue Dec 17 13:14:20 2013 +0530 >+++ b/source/common/x86/intrapred.h Wed Jan 15 19:18:53 2014 +0530 >@@ -95,6 +95,25 @@ > DECL_ANG(16, 18, sse4); > DECL_ANG(16, 26, sse4); > >+DECL_ANG(32, 2, ssse3); >+DECL_ANG(32, 3, sse4); >+DECL_ANG(32, 4, sse4); >+DECL_ANG(32, 5, sse4); >+DECL_ANG(32, 6, sse4); >+DECL_ANG(32, 7, sse4); >+DECL_ANG(32, 8, sse4); >+DECL_ANG(32, 9, sse4); >+DECL_ANG(32, 10, sse4); >+DECL_ANG(32, 11, sse4); >+DECL_ANG(32, 12, sse4); >+DECL_ANG(32, 13, sse4); >+DECL_ANG(32, 14, sse4); >+DECL_ANG(32, 15, sse4); >+DECL_ANG(32, 16, sse4); >+DECL_ANG(32, 17, sse4); >+DECL_ANG(32, 18, sse4); >+DECL_ANG(32, 26, sse4); >+ > #undef DECL_ANG > void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *above0, pixel *left0, > pixel *above1, pixel *left1, bool bLuma); > void x265_all_angs_pred_8x8_sse4(pixel *dest, pixel *above0, pixel *left0, > pixel *above1, pixel *left1, bool bLuma); >diff -r 27c2dac98a3c -r fec35802092c source/common/x86/intrapred8.asm >--- a/source/common/x86/intrapred8.asm Tue Dec 17 13:14:20 2013 +0530 >+++ b/source/common/x86/intrapred8.asm Wed Jan 15 19:18:53 2014 +0530 >@@ -1182,6 +1182,168 @@ > movu [r0 + r1], m2 > RET > >+ >+;--------------------------------------------------------------------------------------------------------------- >+; void intraPredAng32(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel >*refAbove, int dirMode, int bFilter) >+;--------------------------------------------------------------------------------------------------------------- >+INIT_XMM ssse3 >+cglobal intra_pred_ang32_2, 3,4,4 >+ cmp r4m, byte 34 >+ cmove r2, r3mp >+ movu m0, [r2 + 2] >+ movu m1, [r2 + 18] >+ movu m3, [r2 + 34] >+ >+ lea r3, [r1 * 3] >+ >+ movu [r0], m0 >+ movu [r0 + 16], m1 >+ palignr m2, m1, m0, 1 >+ movu [r0 + r1], m2 >+ palignr m2, m3, m1, 1 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m1, m0, 2 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m3, m1, 2 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m1, m0, 3 >+ movu [r0 + r3], m2 >+ palignr m2, m3, m1, 3 >+ movu [r0 + r3 + 16], m2 >+ >+ lea r0, [r0 + r1 * 4] >+ >+ palignr m2, m1, m0, 4 >+ movu [r0], m2 >+ palignr m2, m3, m1, 4 >+ movu [r0 + 16], m2 >+ palignr m2, m1, m0, 5 >+ movu [r0 + r1], m2 >+ palignr m2, m3, m1, 5 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m1, m0, 6 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m3, m1, 6 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m1, m0, 7 >+ movu [r0 + r3], m2 >+ palignr m2, m3, m1, 7 >+ movu [r0 + r3 + 16], m2 >+ >+ lea r0, [r0 + r1 * 4] >+ >+ palignr m2, m1, m0, 8 >+ movu [r0], m2 >+ palignr m2, m3, m1, 8 >+ movu [r0 + 16], m2 >+ palignr m2, m1, m0, 9 >+ movu [r0 + r1], m2 >+ palignr m2, m3, m1, 9 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m1, m0, 10 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m3, m1, 10 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m1, m0, 11 >+ movu [r0 + r3], m2 >+ palignr m2, m3, m1, 11 >+ movu [r0 + r3 + 16], m2 >+ >+ lea r0, [r0 + r1 * 4] >+ >+ palignr m2, m1, m0, 12 >+ movu [r0], m2 >+ palignr m2, m3, m1, 12 >+ movu [r0 + 16], m2 >+ palignr m2, m1, m0, 13 >+ movu [r0 + r1], m2 >+ palignr m2, m3, m1, 13 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m1, m0, 14 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m3, m1, 14 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m1, m0, 15 >+ movu [r0 + r3], m2 >+ palignr m2, m3, m1, 15 >+ movu [r0 + r3 + 16], m2 >+ >+ lea r0, [r0 + r1 * 4] >+ >+ movu [r0], m1 >+ movu m0, [r2 + 50] >+ movu [r0 + 16], m3 >+ palignr m2, m3, m1, 1 >+ movu [r0 + r1], m2 >+ palignr m2, m0, m3, 1 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m3, m1, 2 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m0, m3, 2 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m3, m1, 3 >+ movu [r0 + r3], m2 >+ palignr m2, m0, m3, 3 >+ movu [r0 + r3 + 16], m2 >+ >+ lea r0, [r0 + r1 * 4] >+ >+ palignr m2, m3, m1, 4 >+ movu [r0], m2 >+ palignr m2, m0, m3, 4 >+ movu [r0 + 16], m2 >+ palignr m2, m3, m1, 5 >+ movu [r0 + r1], m2 >+ palignr m2, m0, m3, 5 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m3, m1, 6 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m0, m3, 6 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m3, m1, 7 >+ movu [r0 + r3], m2 >+ palignr m2, m0, m3, 7 >+ movu [r0 + r3 + 16], m2 >+ >+ lea r0, [r0 + r1 * 4] >+ >+ palignr m2, m3, m1, 8 >+ movu [r0], m2 >+ palignr m2, m0, m3, 8 >+ movu [r0 + 16], m2 >+ palignr m2, m3, m1, 9 >+ movu [r0 + r1], m2 >+ palignr m2, m0, m3, 9 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m3, m1, 10 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m0, m3, 10 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m3, m1, 11 >+ movu [r0 + r3], m2 >+ palignr m2, m0, m3, 11 >+ movu [r0 + r3 + 16], m2 >+ >+ lea r0, [r0 + r1 * 4] >+ >+ palignr m2, m3, m1, 12 >+ movu [r0], m2 >+ palignr m2, m0, m3, 12 >+ movu [r0 + 16], m2 >+ palignr m2, m3, m1, 13 >+ movu [r0 + r1], m2 >+ palignr m2, m0, m3, 13 >+ movu [r0 + r1 + 16], m2 >+ palignr m2, m3, m1, 14 >+ movu [r0 + r1 * 2], m2 >+ palignr m2, m0, m3, 14 >+ movu [r0 + r1 * 2 + 16], m2 >+ palignr m2, m3, m1, 15 >+ movu [r0 + r3], m2 >+ palignr m2, m0, m3, 15 >+ movu [r0 + r3 + 16], m2 >+ RET >+ > ;----------------------------------------------------------------------------- > ; void all_angs_pred_4x4(pixel *dest, pixel *above0, pixel *left0, pixel > *above1, pixel *left1, bool bLuma) > ;----------------------------------------------------------------------------- >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
