# HG changeset patch # User Yuvaraj Venkatesh <yuva...@multicorewareinc.com> # Date 1386063278 -19800 # Tue Dec 03 15:04:38 2013 +0530 # Node ID 70be1456ef76e3289d91842e0de59cfa0bf06817 # Parent 21adddaee4606b718fe96f4bb2f5aebcbdf80c2a asm: pixel_satd - 12x16, 24x32, 48x64 for 16bpp
diff -r 21adddaee460 -r 70be1456ef76 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue Dec 03 11:53:32 2013 +0800 +++ b/source/common/x86/asm-primitives.cpp Tue Dec 03 15:04:38 2013 +0530 @@ -497,6 +497,9 @@ p.satd[LUMA_16x12] = x265_pixel_satd_16x12_sse2; p.satd[LUMA_16x32] = x265_pixel_satd_16x32_sse2; p.satd[LUMA_16x64] = x265_pixel_satd_16x64_sse2; + p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2; + p.satd[LUMA_24x32] = x265_pixel_satd_24x32_sse2; + p.satd[LUMA_48x64] = x265_pixel_satd_48x64_sse2; p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2; p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2; diff -r 21adddaee460 -r 70be1456ef76 source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Tue Dec 03 11:53:32 2013 +0800 +++ b/source/common/x86/pixel-a.asm Tue Dec 03 15:04:38 2013 +0530 @@ -1502,48 +1502,48 @@ call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 - lea r0, [r6 + 8] - lea r2, [r7 + 8] - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 16] - lea r2, [r7 + 16] - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 24] - lea r2, [r7 + 24] - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 32] - lea r2, [r7 + 32] - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 40] - lea r2, [r7 + 40] + lea r0, [r6 + 8*SIZEOF_PIXEL] + lea r2, [r7 + 8*SIZEOF_PIXEL] + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 16*SIZEOF_PIXEL] + lea r2, [r7 + 16*SIZEOF_PIXEL] + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 24*SIZEOF_PIXEL] + lea r2, [r7 + 24*SIZEOF_PIXEL] + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 32*SIZEOF_PIXEL] + lea r2, [r7 + 32*SIZEOF_PIXEL] + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 40*SIZEOF_PIXEL] + lea r2, [r7 + 40*SIZEOF_PIXEL] call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 @@ -1572,53 +1572,53 @@ call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 - lea r0, [r6 + 8] + lea r0, [r6 + 8*SIZEOF_PIXEL] mov r2, [rsp] - add r2,8 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 16] + add r2,8*SIZEOF_PIXEL + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 16*SIZEOF_PIXEL] mov r2, [rsp] - add r2,16 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 24] + add r2,16*SIZEOF_PIXEL + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 24*SIZEOF_PIXEL] mov r2, [rsp] - add r2,24 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 32] + add r2,24*SIZEOF_PIXEL + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 32*SIZEOF_PIXEL] mov r2, [rsp] - add r2,32 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - call pixel_satd_8x8_internal2 - lea r0, [r6 + 40] + add r2,32*SIZEOF_PIXEL + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + call pixel_satd_8x8_internal2 + lea r0, [r6 + 40*SIZEOF_PIXEL] mov r2, [rsp] - add r2,40 + add r2,40*SIZEOF_PIXEL call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 call pixel_satd_8x8_internal2 @@ -2278,14 +2278,14 @@ lea r0, [r0 + r1*2*SIZEOF_PIXEL] lea r2, [r2 + r3*2*SIZEOF_PIXEL] SATD_4x8_SSE vertical, 1, add - lea r0, [r6 + 4] - lea r2, [r7 + 4] + lea r0, [r6 + 4*SIZEOF_PIXEL] + lea r2, [r7 + 4*SIZEOF_PIXEL] SATD_4x8_SSE vertical, 1, add lea r0, [r0 + r1*2*SIZEOF_PIXEL] lea r2, [r2 + r3*2*SIZEOF_PIXEL] SATD_4x8_SSE vertical, 1, add - lea r0, [r6 + 8] - lea r2, [r7 + 8] + lea r0, [r6 + 8*SIZEOF_PIXEL] + lea r2, [r7 + 8*SIZEOF_PIXEL] SATD_4x8_SSE vertical, 1, add lea r0, [r0 + r1*2*SIZEOF_PIXEL] lea r2, [r2 + r3*2*SIZEOF_PIXEL] @@ -2305,16 +2305,16 @@ lea r0, [r0 + r1*2*SIZEOF_PIXEL] lea r2, [r2 + r3*2*SIZEOF_PIXEL] SATD_4x8_SSE vertical, 1, add - lea r0, [r6 + 4] + lea r0, [r6 + 4*SIZEOF_PIXEL] mov r2, [rsp] - add r2, 4 + add r2, 4*SIZEOF_PIXEL SATD_4x8_SSE vertical, 1, add lea r0, [r0 + r1*2*SIZEOF_PIXEL] lea r2, [r2 + r3*2*SIZEOF_PIXEL] SATD_4x8_SSE vertical, 1, add - lea r0, [r6 + 8] + lea r0, [r6 + 8*SIZEOF_PIXEL] mov r2, [rsp] - add r2, 8 + add r2, 8*SIZEOF_PIXEL SATD_4x8_SSE vertical, 1, add lea r0, [r0 + r1*2*SIZEOF_PIXEL] lea r2, [r2 + r3*2*SIZEOF_PIXEL] @@ -2333,19 +2333,21 @@ call pixel_satd_8x8_internal call pixel_satd_8x8_internal call pixel_satd_8x8_internal - lea r0, [r6 + 8] - lea r2, [r7 + 8] - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - lea r0, [r6 + 16] - lea r2, [r7 + 16] - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - SATD_END_SSE2 m6 + SATD_ACCUM m6, m0, m7 + lea r0, [r6 + 8*SIZEOF_PIXEL] + lea r2, [r7 + 8*SIZEOF_PIXEL] + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + SATD_ACCUM m6, m0, m7 + lea r0, [r6 + 16*SIZEOF_PIXEL] + lea r2, [r7 + 16*SIZEOF_PIXEL] + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + SATD_END_SSE2 m6, m7 %else cglobal pixel_satd_24x32, 4,7,8,0-4 SATD_START_SSE2 m6, m7 @@ -2355,21 +2357,26 @@ call pixel_satd_8x8_internal call pixel_satd_8x8_internal call pixel_satd_8x8_internal - lea r0, [r6 + 8] +%if HIGH_BIT_DEPTH + pxor m7, m7 +%endif + SATD_ACCUM m6, m0, m7 + lea r0, [r6 + 8*SIZEOF_PIXEL] mov r2, [rsp] - add r2, 8 - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - lea r0, [r6 + 16] + add r2, 8*SIZEOF_PIXEL + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + SATD_ACCUM m6, m0, m7 + lea r0, [r6 + 16*SIZEOF_PIXEL] mov r2, [rsp] - add r2, 16 - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - call pixel_satd_8x8_internal - SATD_END_SSE2 m6 + add r2, 16*SIZEOF_PIXEL + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + call pixel_satd_8x8_internal + SATD_END_SSE2 m6, m7 %endif ;WIN64 cglobal pixel_satd_8x32, 4,6,8 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel