# HG changeset patch # User Min Chen <chenm...@163.com> # Date 1409963761 25200 # Node ID c4dd39c9ad0b96fbf520f399de41e1e9b4b77c72 # Parent 8abcfdeeea2eab2e11da59002dad42dcf16aeab8 asm: reduce number of movd in dequant_normal
diff -r 8abcfdeeea2e -r c4dd39c9ad0b source/common/dct.cpp --- a/source/common/dct.cpp Fri Sep 05 16:48:03 2014 -0700 +++ b/source/common/dct.cpp Fri Sep 05 17:36:01 2014 -0700 @@ -729,6 +729,7 @@ X265_CHECK(num <= 32 * 32, "dequant num %d too large\n", num); X265_CHECK((num % 8) == 0, "dequant num %d not multiple of 8\n", num); X265_CHECK(shift <= 10, "shift too large %d\n", shift); + X265_CHECK(((int)coef & 31) == 0, "dequant coef buffer not aligned\n"); int add, coeffQ; diff -r 8abcfdeeea2e -r c4dd39c9ad0b source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Fri Sep 05 16:48:03 2014 -0700 +++ b/source/common/x86/pixel-util8.asm Fri Sep 05 17:36:01 2014 -0700 @@ -1040,21 +1040,18 @@ ;----------------------------------------------------------------------------- INIT_XMM sse4 cglobal dequant_normal, 5,5,5 - movd m1, r3 ; m1 = word [scale] mova m2, [pw_1] %if HIGH_BIT_DEPTH cmp r3d, 32767 jle .skip - psrld m1, 2 + shr r3d, 2 sub r4d, 2 .skip: %endif movd m0, r4d ; m0 = shift - xor r3d, r3d - dec r4d + add r4d, 15 bts r3d, r4d - movd m3, r3d - punpcklwd m1, m3 + movd m1, r3d pshufd m1, m1, 0 ; m1 = dword [add scale] ; m0 = shift ; m1 = scale @@ -1071,8 +1068,8 @@ pmovsxwd m3, m3 packssdw m4, m4 pmovsxwd m4, m4 - movu [r1], m3 - movu [r1 + 16], m4 + mova [r1], m3 + mova [r1 + 16], m4 add r0, 16 add r1, 32 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel