I can't take this of the 16x16 patch because the 4x4 patch still causes 
crashes.  You'll need to fix the first one and then resubmit these all together.

On Nov 19, 2013, at 12:23 AM, [email protected] wrote:

> # HG changeset patch
> # User Murugan Vairavel <[email protected]>
> # Date 1384842189 -19800
> #      Tue Nov 19 11:53:09 2013 +0530
> # Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50
> # Parent  f6a050b79cfa400aa432f49ee8a4c2b9f20cf930
> asm: code for transpose_8x8 routine
> 
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp    Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/asm-primitives.cpp    Tue Nov 19 11:53:09 2013 +0530
> @@ -546,6 +546,7 @@
>         p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
>         p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
>         p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
> +        p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
>     }
>     if (cpuMask & X265_CPU_SSSE3)
>     {
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm   Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/pixel-a.asm   Tue Nov 19 11:53:09 2013 +0530
> @@ -8359,3 +8359,45 @@
>     movu         [r0],    m0
> 
>     RET
> +
> +;-----------------------------------------------------------------
> +; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride)
> +;-----------------------------------------------------------------
> +INIT_XMM sse2
> +cglobal transpose8, 3, 3, 8, dest, src, stride
> +
> +    movh         m0,    [r1]
> +    movh         m1,    [r1 + r2]
> +    movh         m2,    [r1 + 2 * r2]
> +    lea          r1,    [r1 + 2 * r2]
> +    movh         m3,    [r1 + r2]
> +    movh         m4,    [r1 + 2 * r2]
> +    lea          r1,    [r1 + 2 * r2]
> +    movh         m5,    [r1 + r2]
> +    movh         m6,    [r1 + 2 * r2]
> +    lea          r1,    [r1 + 2 * r2]
> +    movh         m7,    [r1 + r2]
> +
> +    punpcklbw    m0,    m1
> +    punpcklbw    m2,    m3
> +    punpcklbw    m4,    m5
> +    punpcklbw    m6,    m7
> +    movu         m1,    m0
> +    punpcklwd    m0,    m2
> +    punpckhwd    m1,    m2
> +    movu         m5,    m4
> +    punpcklwd    m4,    m6
> +    punpckhwd    m5,    m6
> +    movu         m2,    m0
> +    punpckldq    m0,    m4
> +    punpckhdq    m2,    m4
> +    movu         m3,    m1
> +    punpckldq    m1,    m5
> +    punpckhdq    m3,    m5
> +
> +    movu         [r0],         m0
> +    movu         [r0 + 16],    m2
> +    movu         [r0 + 32],    m1
> +    movu         [r0 + 48],    m3
> +
> +    RET
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h       Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/pixel.h       Tue Nov 19 11:53:09 2013 +0530
> @@ -366,5 +366,6 @@
> void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
> intptr_t stride);
> void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
> intptr_t stride);
> void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
> +void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
> 
> #endif // ifndef X265_I386_PIXEL_H
> _______________________________________________
> x265-devel mailing list
> [email protected]
> https://mailman.videolan.org/listinfo/x265-devel

Attachment: signature.asc
Description: Message signed with OpenPGP using GPGMail

_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to