# HG changeset patch # User Murugan Vairavel <muru...@multicorewareinc.com> # Date 1386140597 -19800 # Wed Dec 04 12:33:17 2013 +0530 # Node ID ee1221fac033355129128ba5f847910e3ed49047 # Parent 8b73b22d90e1a0d70495e8b5f009a9c4fc37f258 asm: 10bpp code for transpose 32x32
diff -r 8b73b22d90e1 -r ee1221fac033 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Dec 04 12:06:19 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Dec 04 12:33:17 2013 +0530 @@ -529,6 +529,7 @@ p.transpose[BLOCK_4x4] = x265_transpose4_sse2; p.transpose[BLOCK_8x8] = x265_transpose8_sse2; p.transpose[BLOCK_16x16] = x265_transpose16_sse2; + p.transpose[BLOCK_32x32] = x265_transpose32_sse2; p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2; PIXEL_AVG(sse2); diff -r 8b73b22d90e1 -r ee1221fac033 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Wed Dec 04 12:06:19 2013 +0530 +++ b/source/common/x86/pixel-util8.asm Wed Dec 04 12:33:17 2013 +0530 @@ -1039,8 +1039,76 @@ ; void transpose_32x32(pixel *dst, pixel *src, intptr_t stride) ;----------------------------------------------------------------- INIT_XMM sse2 +%if HIGH_BIT_DEPTH +cglobal transpose32, 3, 7, 4, dest, src, stride + add r2, r2 + mov r3, r0 + mov r4, r1 + mov r5, 64 + mov r6, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 16] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 32] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 48] + mov r3, r0 + call transpose8_internal + lea r1, [r4 + 16] + lea r0, [r6 + 8 * 64] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 8 * 64 + 16] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 8 * 64 + 32] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 8 * 64 + 48] + mov r3, r0 + call transpose8_internal + lea r1, [r4 + 32] + lea r0, [r6 + 16 * 64] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 16 * 64 + 16] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 16 * 64 + 32] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 16 * 64 + 48] + mov r3, r0 + call transpose8_internal + lea r1, [r4 + 48] + lea r0, [r6 + 24 * 64] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 24 * 64 + 16] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 24 * 64 + 32] + mov r3, r0 + call transpose8_internal + lea r1, [r1 - 8 + 2 * r2] + lea r0, [r6 + 24 * 64 + 48] + mov r3, r0 + call transpose8_internal +%else cglobal transpose32, 3, 7, 8, dest, src, stride - mov r3, r0 mov r4, r1 mov r5, r0 @@ -1058,7 +1126,7 @@ lea r0, [r3 + 16 * 32 + 16] mov r5, r0 call transpose16_internal - +%endif RET ;----------------------------------------------------------------- _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel