I can't take this of the 16x16 patch because the 4x4 patch still causes crashes. You'll need to fix the first one and then resubmit these all together.
On Nov 19, 2013, at 12:23 AM, [email protected] wrote: > # HG changeset patch > # User Murugan Vairavel <[email protected]> > # Date 1384842189 -19800 > # Tue Nov 19 11:53:09 2013 +0530 > # Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50 > # Parent f6a050b79cfa400aa432f49ee8a4c2b9f20cf930 > asm: code for transpose_8x8 routine > > diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Tue Nov 19 11:25:00 2013 +0530 > +++ b/source/common/x86/asm-primitives.cpp Tue Nov 19 11:53:09 2013 +0530 > @@ -546,6 +546,7 @@ > p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; > p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; > p.transpose[BLOCK_4x4] = x265_transpose4_sse2; > + p.transpose[BLOCK_8x8] = x265_transpose8_sse2; > } > if (cpuMask & X265_CPU_SSSE3) > { > diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm > --- a/source/common/x86/pixel-a.asm Tue Nov 19 11:25:00 2013 +0530 > +++ b/source/common/x86/pixel-a.asm Tue Nov 19 11:53:09 2013 +0530 > @@ -8359,3 +8359,45 @@ > movu [r0], m0 > > RET > + > +;----------------------------------------------------------------- > +; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride) > +;----------------------------------------------------------------- > +INIT_XMM sse2 > +cglobal transpose8, 3, 3, 8, dest, src, stride > + > + movh m0, [r1] > + movh m1, [r1 + r2] > + movh m2, [r1 + 2 * r2] > + lea r1, [r1 + 2 * r2] > + movh m3, [r1 + r2] > + movh m4, [r1 + 2 * r2] > + lea r1, [r1 + 2 * r2] > + movh m5, [r1 + r2] > + movh m6, [r1 + 2 * r2] > + lea r1, [r1 + 2 * r2] > + movh m7, [r1 + r2] > + > + punpcklbw m0, m1 > + punpcklbw m2, m3 > + punpcklbw m4, m5 > + punpcklbw m6, m7 > + movu m1, m0 > + punpcklwd m0, m2 > + punpckhwd m1, m2 > + movu m5, m4 > + punpcklwd m4, m6 > + punpckhwd m5, m6 > + movu m2, m0 > + punpckldq m0, m4 > + punpckhdq m2, m4 > + movu m3, m1 > + punpckldq m1, m5 > + punpckhdq m3, m5 > + > + movu [r0], m0 > + movu [r0 + 16], m2 > + movu [r0 + 32], m1 > + movu [r0 + 48], m3 > + > + RET > diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h > --- a/source/common/x86/pixel.h Tue Nov 19 11:25:00 2013 +0530 > +++ b/source/common/x86/pixel.h Tue Nov 19 11:53:09 2013 +0530 > @@ -366,5 +366,6 @@ > void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, > intptr_t stride); > void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, > intptr_t stride); > void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride); > +void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride); > > #endif // ifndef X265_I386_PIXEL_H > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel
signature.asc
Description: Message signed with OpenPGP using GPGMail
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
