# HG changeset patch # User Yuvaraj Venkatesh <yuva...@multicorewareinc.com> # Date 1386142683 -19800 # Wed Dec 04 13:08:03 2013 +0530 # Node ID 546523046d990119dc910b87ebe3f4c8ab25f236 # Parent 6a41cb559feb98056d30482651f5a83f5e326300 asm: 16bpp support for sa8d - 24x32 and 48x64
diff -r 6a41cb559feb -r 546523046d99 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Dec 04 12:39:42 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Dec 04 13:08:03 2013 +0530 @@ -516,6 +516,8 @@ p.sa8d_inter[LUMA_16x12] = x265_pixel_satd_16x12_sse2; p.sa8d_inter[LUMA_16x32] = x265_pixel_sa8d_16x32_sse2; p.sa8d_inter[LUMA_16x64] = x265_pixel_sa8d_16x64_sse2; + p.sa8d_inter[LUMA_24x32] = x265_pixel_sa8d_24x32_sse2; + p.sa8d_inter[LUMA_48x64] = x265_pixel_sa8d_48x64_sse2; p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2; p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2; diff -r 6a41cb559feb -r 546523046d99 source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Wed Dec 04 12:39:42 2013 +0530 +++ b/source/common/x86/pixel-a.asm Wed Dec 04 13:08:03 2013 +0530 @@ -2683,38 +2683,38 @@ mova m7, [hmul_8p] %endif SA8D_8x8 - add r0, 8 - add r2, 8 + add r0, 8*SIZEOF_PIXEL + add r2, 8*SIZEOF_PIXEL SA8D_8x8 - add r0, 8 - add r2, 8 + add r0, 8*SIZEOF_PIXEL + add r2, 8*SIZEOF_PIXEL SA8D_8x8 lea r0, [r0 + r1*8] lea r2, [r2 + r3*8] SA8D_8x8 - sub r0, 8 - sub r2, 8 + sub r0, 8*SIZEOF_PIXEL + sub r2, 8*SIZEOF_PIXEL SA8D_8x8 - sub r0, 8 - sub r2, 8 + sub r0, 8*SIZEOF_PIXEL + sub r2, 8*SIZEOF_PIXEL SA8D_8x8 lea r0, [r0 + r1*8] lea r2, [r2 + r3*8] SA8D_8x8 - add r0, 8 - add r2, 8 + add r0, 8*SIZEOF_PIXEL + add r2, 8*SIZEOF_PIXEL SA8D_8x8 - add r0, 8 - add r2, 8 + add r0, 8*SIZEOF_PIXEL + add r2, 8*SIZEOF_PIXEL SA8D_8x8 lea r0, [r0 + r1*8] lea r2, [r2 + r3*8] SA8D_8x8 - sub r0, 8 - sub r2, 8 + sub r0, 8*SIZEOF_PIXEL + sub r2, 8*SIZEOF_PIXEL SA8D_8x8 - sub r0, 8 - sub r2, 8 + sub r0, 8*SIZEOF_PIXEL + sub r2, 8*SIZEOF_PIXEL SA8D_8x8 movd eax, m12 RET @@ -2909,8 +2909,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - add r2, 16 - add r0, 16 + add r2, 16*SIZEOF_PIXEL + add r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -2918,8 +2918,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - add r2, 16 - add r0, 16 + add r2, 16*SIZEOF_PIXEL + add r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -2930,8 +2930,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - sub r2, 16 - sub r0, 16 + sub r2, 16*SIZEOF_PIXEL + sub r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -2939,8 +2939,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - sub r2, 16 - sub r0, 16 + sub r2, 16*SIZEOF_PIXEL + sub r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -2951,8 +2951,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - add r2, 16 - add r0, 16 + add r2, 16*SIZEOF_PIXEL + add r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -2960,8 +2960,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - add r2, 16 - add r0, 16 + add r2, 16*SIZEOF_PIXEL + add r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -2972,8 +2972,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - sub r2, 16 - sub r0, 16 + sub r2, 16*SIZEOF_PIXEL + sub r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -2981,8 +2981,8 @@ lea r5, [8*r3] sub r2, r4 sub r0, r5 - sub r2, 16 - sub r0, 16 + sub r2, 16*SIZEOF_PIXEL + sub r0, 16*SIZEOF_PIXEL lea r4, [3*r1] lea r5, [3*r3] SA8D_16x16 @@ -4577,6 +4577,9 @@ lea r4, [r1 + 2*r1] lea r5, [r3 + 2*r3] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [rsp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4590,8 +4593,10 @@ SA8D_INTER mova [esp+48], m0 call pixel_sa8d_8x8_internal2 - paddusw m0, [esp+48] + SA8D_INTER +%if HIGH_BIT_DEPTH == 0 HADDUW m0, m1 +%endif movd r4d, m0 add r4d, 1 shr r4d, 1 @@ -4603,6 +4608,9 @@ add r2, 16*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4624,6 +4632,9 @@ add r2, 32*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4650,6 +4661,9 @@ lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4671,6 +4685,9 @@ add r2, 16*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4692,6 +4709,9 @@ add r2, 32*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4718,6 +4738,9 @@ lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4739,6 +4762,9 @@ add r2, 16*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4760,6 +4786,9 @@ add r2, 32*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4786,6 +4815,9 @@ lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4807,6 +4839,9 @@ add r2, 16*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4828,6 +4863,9 @@ add r2, 32*SIZEOF_PIXEL lea r4, [r1 + 2*r1] call pixel_sa8d_8x8_internal2 +%if HIGH_BIT_DEPTH + HADDUW m0, m1 +%endif mova [esp+48], m0 call pixel_sa8d_8x8_internal2 SA8D_INTER @@ -4841,8 +4879,10 @@ SA8D_INTER mova [esp+64-mmsize], m0 call pixel_sa8d_8x8_internal2 - paddusw m0, [esp+48] + SA8D_INTER +%if HIGH_BIT_DEPTH == 0 HADDUW m0, m1 +%endif movd r4d, m0 add r4d, 1 shr r4d, 1 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel