# HG changeset patch # User Dnyaneshwar G <dnyanesh...@multicorewareinc.com> # Date 1389793733 -19800 # Wed Jan 15 19:18:53 2014 +0530 # Node ID fec35802092cec9416924eabc072ee0993df34e1 # Parent 27c2dac98a3c34e6e1b03960d296ef8f2295fefd asm: code for intra_pred[BLOCK_32x32] mode 2 and 34
diff -r 27c2dac98a3c -r fec35802092c source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue Dec 17 13:14:20 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Jan 15 19:18:53 2014 +0530 @@ -554,6 +554,9 @@ #define SETUP_INTRA_ANG16(mode, fno, cpu) \ p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; +#define SETUP_INTRA_ANG32(mode, fno, cpu) \ + p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu; + namespace x265 { // private x265 namespace void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) @@ -896,6 +899,8 @@ SETUP_INTRA_ANG8(34, 2, ssse3); SETUP_INTRA_ANG16(2, 2, ssse3); SETUP_INTRA_ANG16(34, 2, ssse3); + SETUP_INTRA_ANG32(2, 2, ssse3); + SETUP_INTRA_ANG32(34, 2, ssse3); p.scale1D_128to64 = x265_scale1D_128to64_ssse3; p.scale2D_64to32 = x265_scale2D_64to32_ssse3; diff -r 27c2dac98a3c -r fec35802092c source/common/x86/intrapred.h --- a/source/common/x86/intrapred.h Tue Dec 17 13:14:20 2013 +0530 +++ b/source/common/x86/intrapred.h Wed Jan 15 19:18:53 2014 +0530 @@ -95,6 +95,25 @@ DECL_ANG(16, 18, sse4); DECL_ANG(16, 26, sse4); +DECL_ANG(32, 2, ssse3); +DECL_ANG(32, 3, sse4); +DECL_ANG(32, 4, sse4); +DECL_ANG(32, 5, sse4); +DECL_ANG(32, 6, sse4); +DECL_ANG(32, 7, sse4); +DECL_ANG(32, 8, sse4); +DECL_ANG(32, 9, sse4); +DECL_ANG(32, 10, sse4); +DECL_ANG(32, 11, sse4); +DECL_ANG(32, 12, sse4); +DECL_ANG(32, 13, sse4); +DECL_ANG(32, 14, sse4); +DECL_ANG(32, 15, sse4); +DECL_ANG(32, 16, sse4); +DECL_ANG(32, 17, sse4); +DECL_ANG(32, 18, sse4); +DECL_ANG(32, 26, sse4); + #undef DECL_ANG void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma); void x265_all_angs_pred_8x8_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma); diff -r 27c2dac98a3c -r fec35802092c source/common/x86/intrapred8.asm --- a/source/common/x86/intrapred8.asm Tue Dec 17 13:14:20 2013 +0530 +++ b/source/common/x86/intrapred8.asm Wed Jan 15 19:18:53 2014 +0530 @@ -1182,6 +1182,168 @@ movu [r0 + r1], m2 RET + +;--------------------------------------------------------------------------------------------------------------- +; void intraPredAng32(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter) +;--------------------------------------------------------------------------------------------------------------- +INIT_XMM ssse3 +cglobal intra_pred_ang32_2, 3,4,4 + cmp r4m, byte 34 + cmove r2, r3mp + movu m0, [r2 + 2] + movu m1, [r2 + 18] + movu m3, [r2 + 34] + + lea r3, [r1 * 3] + + movu [r0], m0 + movu [r0 + 16], m1 + palignr m2, m1, m0, 1 + movu [r0 + r1], m2 + palignr m2, m3, m1, 1 + movu [r0 + r1 + 16], m2 + palignr m2, m1, m0, 2 + movu [r0 + r1 * 2], m2 + palignr m2, m3, m1, 2 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m1, m0, 3 + movu [r0 + r3], m2 + palignr m2, m3, m1, 3 + movu [r0 + r3 + 16], m2 + + lea r0, [r0 + r1 * 4] + + palignr m2, m1, m0, 4 + movu [r0], m2 + palignr m2, m3, m1, 4 + movu [r0 + 16], m2 + palignr m2, m1, m0, 5 + movu [r0 + r1], m2 + palignr m2, m3, m1, 5 + movu [r0 + r1 + 16], m2 + palignr m2, m1, m0, 6 + movu [r0 + r1 * 2], m2 + palignr m2, m3, m1, 6 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m1, m0, 7 + movu [r0 + r3], m2 + palignr m2, m3, m1, 7 + movu [r0 + r3 + 16], m2 + + lea r0, [r0 + r1 * 4] + + palignr m2, m1, m0, 8 + movu [r0], m2 + palignr m2, m3, m1, 8 + movu [r0 + 16], m2 + palignr m2, m1, m0, 9 + movu [r0 + r1], m2 + palignr m2, m3, m1, 9 + movu [r0 + r1 + 16], m2 + palignr m2, m1, m0, 10 + movu [r0 + r1 * 2], m2 + palignr m2, m3, m1, 10 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m1, m0, 11 + movu [r0 + r3], m2 + palignr m2, m3, m1, 11 + movu [r0 + r3 + 16], m2 + + lea r0, [r0 + r1 * 4] + + palignr m2, m1, m0, 12 + movu [r0], m2 + palignr m2, m3, m1, 12 + movu [r0 + 16], m2 + palignr m2, m1, m0, 13 + movu [r0 + r1], m2 + palignr m2, m3, m1, 13 + movu [r0 + r1 + 16], m2 + palignr m2, m1, m0, 14 + movu [r0 + r1 * 2], m2 + palignr m2, m3, m1, 14 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m1, m0, 15 + movu [r0 + r3], m2 + palignr m2, m3, m1, 15 + movu [r0 + r3 + 16], m2 + + lea r0, [r0 + r1 * 4] + + movu [r0], m1 + movu m0, [r2 + 50] + movu [r0 + 16], m3 + palignr m2, m3, m1, 1 + movu [r0 + r1], m2 + palignr m2, m0, m3, 1 + movu [r0 + r1 + 16], m2 + palignr m2, m3, m1, 2 + movu [r0 + r1 * 2], m2 + palignr m2, m0, m3, 2 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m3, m1, 3 + movu [r0 + r3], m2 + palignr m2, m0, m3, 3 + movu [r0 + r3 + 16], m2 + + lea r0, [r0 + r1 * 4] + + palignr m2, m3, m1, 4 + movu [r0], m2 + palignr m2, m0, m3, 4 + movu [r0 + 16], m2 + palignr m2, m3, m1, 5 + movu [r0 + r1], m2 + palignr m2, m0, m3, 5 + movu [r0 + r1 + 16], m2 + palignr m2, m3, m1, 6 + movu [r0 + r1 * 2], m2 + palignr m2, m0, m3, 6 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m3, m1, 7 + movu [r0 + r3], m2 + palignr m2, m0, m3, 7 + movu [r0 + r3 + 16], m2 + + lea r0, [r0 + r1 * 4] + + palignr m2, m3, m1, 8 + movu [r0], m2 + palignr m2, m0, m3, 8 + movu [r0 + 16], m2 + palignr m2, m3, m1, 9 + movu [r0 + r1], m2 + palignr m2, m0, m3, 9 + movu [r0 + r1 + 16], m2 + palignr m2, m3, m1, 10 + movu [r0 + r1 * 2], m2 + palignr m2, m0, m3, 10 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m3, m1, 11 + movu [r0 + r3], m2 + palignr m2, m0, m3, 11 + movu [r0 + r3 + 16], m2 + + lea r0, [r0 + r1 * 4] + + palignr m2, m3, m1, 12 + movu [r0], m2 + palignr m2, m0, m3, 12 + movu [r0 + 16], m2 + palignr m2, m3, m1, 13 + movu [r0 + r1], m2 + palignr m2, m0, m3, 13 + movu [r0 + r1 + 16], m2 + palignr m2, m3, m1, 14 + movu [r0 + r1 * 2], m2 + palignr m2, m0, m3, 14 + movu [r0 + r1 * 2 + 16], m2 + palignr m2, m3, m1, 15 + movu [r0 + r3], m2 + palignr m2, m0, m3, 15 + movu [r0 + r3 + 16], m2 + RET + ;----------------------------------------------------------------------------- ; void all_angs_pred_4x4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma) ;----------------------------------------------------------------------------- _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel