I have review his patch, after push they patch, I may send a new version
At 2014-09-04 12:47:24,"Deepthi Nandakumar" <deep...@multicorewareinc.com> wrote: Min, Praveen has sent a number of patches on changing the entire interface for quant such that the coefficients are now 16-bit instead of 32-bit. Your patches still assume they are 32-bit? Can you review all his patches (8-10 patches) and see if we're moving in the right direction? Thanks, Deepthi On Thu, Sep 4, 2014 at 5:07 AM, Min Chen <chenm...@163.com> wrote: # HG changeset patch # User Min Chen <chenm...@163.com> # Date 1409787419 25200 # Node ID 4ca9e972f48cb4530ca7181ad7cec351568a99b3 # Parent 94bd00d1af5d8c5f6f26f97c50a727588a860714 asm: optimize nquant by PSIGND, improve 13k cycles -> 11k cycles diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/dct.cpp --- a/source/common/dct.cpp Wed Sep 03 16:36:44 2014 -0700 +++ b/source/common/dct.cpp Wed Sep 03 16:36:59 2014 -0700 @@ -801,6 +801,10 @@ { uint32_t numSig = 0; + X265_CHECK((numCoeff % 16) == 0, "number of quant coeff is not multiple of 4x4\n"); + X265_CHECK((uint32_t)add < ((uint32_t)1 << qBits), "2 ^ qBits less than add\n"); + X265_CHECK(((intptr_t)quantCoeff & 15) == 0, "quantCoeff buffer not aligned\n"); + for (int blockpos = 0; blockpos < numCoeff; blockpos++) { int level = coef[blockpos]; diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:44 2014 -0700 +++ b/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:59 2014 -0700 @@ -941,55 +941,47 @@ ; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); ;----------------------------------------------------------------------------- INIT_XMM sse4 -cglobal nquant, 4,5,8 +cglobal nquant, 3,5,8 movd m6, r4m mov r4d, r5m pxor m7, m7 ; m7 = numZero - movd m5, r3d ; m5 = qbits + movd m5, r3m ; m5 = qbits pshufd m6, m6, 0 ; m6 = add mov r3d, r4d ; r3 = numCoeff shr r4d, 3 + .loop: movu m0, [r0] ; m0 = level movu m1, [r0 + 16] ; m1 = level - movu m2, [r1] ; m2 = qcoeff - movu m3, [r1 + 16] ; m3 = qcoeff + + pabsd m2, m0 + pmulld m2, [r1] ; m4 = tmpLevel1 + paddd m2, m6 + psrad m2, m5 ; m4 = level1 + psignd m2, m0 ; restore sign + + pabsd m3, m1 + pmulld m3, [r1 + 16] ; m4 = tmpLevel1 + paddd m3, m6 + psrad m3, m5 ; m4 = level1 + psignd m3, m1 ; restore sign add r0, 32 add r1, 32 - pxor m4, m4 - pcmpgtd m4, m0 ; m4 = sign - pabsd m0, m0 - pmulld m0, m2 ; m0 = tmpLevel1 - paddd m0, m6 - psrad m0, m5 ; m0 = level1 - pxor m0, m4 - psubd m0, m4 - - pxor m4, m4 - pcmpgtd m4, m1 ; m4 = sign - pabsd m1, m1 - pmulld m1, m3 ; m1 = tmpLevel1 - paddd m1, m6 - psrad m1, m5 ; m1 = level1 - pxor m1, m4 - psubd m1, m4 - - packssdw m0, m0 - packssdw m1, m1 - pmovsxwd m0, m0 + packssdw m2, m3 + pmovsxwd m0, m2 + movhlps m1, m2 pmovsxwd m1, m1 - movu [r2], m0 + movu [r2 ], m0 movu [r2 + 16], m1 add r2, 32 + + pxor m4, m4 + pcmpeqw m2, m4 + psubw m7, m2 + dec r4d - - packssdw m0, m1 - pxor m4, m4 - pcmpeqw m0, m4 - psubw m7, m0 - jnz .loop packuswb m7, m7 @@ -997,10 +989,8 @@ mov eax, r3d movd r4d, m7 sub eax, r4d ; numSig - RET - ;----------------------------------------------------------------------------- ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift) ;----------------------------------------------------------------------------- _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel