Min, Praveen has sent a number of patches on changing the entire interface for quant such that the coefficients are now 16-bit instead of 32-bit. Your patches still assume they are 32-bit?
Can you review all his patches (8-10 patches) and see if we're moving in the right direction? Thanks, Deepthi On Thu, Sep 4, 2014 at 5:07 AM, Min Chen <chenm...@163.com> wrote: > # HG changeset patch > # User Min Chen <chenm...@163.com> > # Date 1409787419 25200 > # Node ID 4ca9e972f48cb4530ca7181ad7cec351568a99b3 > # Parent 94bd00d1af5d8c5f6f26f97c50a727588a860714 > asm: optimize nquant by PSIGND, improve 13k cycles -> 11k cycles > > diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/dct.cpp > --- a/source/common/dct.cpp Wed Sep 03 16:36:44 2014 -0700 > +++ b/source/common/dct.cpp Wed Sep 03 16:36:59 2014 -0700 > @@ -801,6 +801,10 @@ > { > uint32_t numSig = 0; > > + X265_CHECK((numCoeff % 16) == 0, "number of quant coeff is not > multiple of 4x4\n"); > + X265_CHECK((uint32_t)add < ((uint32_t)1 << qBits), "2 ^ qBits less > than add\n"); > + X265_CHECK(((intptr_t)quantCoeff & 15) == 0, "quantCoeff buffer not > aligned\n"); > + > for (int blockpos = 0; blockpos < numCoeff; blockpos++) > { > int level = coef[blockpos]; > diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/x86/pixel-util8.asm > --- a/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:44 2014 -0700 > +++ b/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:59 2014 -0700 > @@ -941,55 +941,47 @@ > ; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int > qBits, int add, int numCoeff); > > > ;----------------------------------------------------------------------------- > INIT_XMM sse4 > -cglobal nquant, 4,5,8 > +cglobal nquant, 3,5,8 > movd m6, r4m > mov r4d, r5m > pxor m7, m7 ; m7 = numZero > - movd m5, r3d ; m5 = qbits > + movd m5, r3m ; m5 = qbits > pshufd m6, m6, 0 ; m6 = add > mov r3d, r4d ; r3 = numCoeff > shr r4d, 3 > + > .loop: > movu m0, [r0] ; m0 = level > movu m1, [r0 + 16] ; m1 = level > - movu m2, [r1] ; m2 = qcoeff > - movu m3, [r1 + 16] ; m3 = qcoeff > + > + pabsd m2, m0 > + pmulld m2, [r1] ; m4 = tmpLevel1 > + paddd m2, m6 > + psrad m2, m5 ; m4 = level1 > + psignd m2, m0 ; restore sign > + > + pabsd m3, m1 > + pmulld m3, [r1 + 16] ; m4 = tmpLevel1 > + paddd m3, m6 > + psrad m3, m5 ; m4 = level1 > + psignd m3, m1 ; restore sign > add r0, 32 > add r1, 32 > > - pxor m4, m4 > - pcmpgtd m4, m0 ; m4 = sign > - pabsd m0, m0 > - pmulld m0, m2 ; m0 = tmpLevel1 > - paddd m0, m6 > - psrad m0, m5 ; m0 = level1 > - pxor m0, m4 > - psubd m0, m4 > - > - pxor m4, m4 > - pcmpgtd m4, m1 ; m4 = sign > - pabsd m1, m1 > - pmulld m1, m3 ; m1 = tmpLevel1 > - paddd m1, m6 > - psrad m1, m5 ; m1 = level1 > - pxor m1, m4 > - psubd m1, m4 > - > - packssdw m0, m0 > - packssdw m1, m1 > - pmovsxwd m0, m0 > + packssdw m2, m3 > + pmovsxwd m0, m2 > + movhlps m1, m2 > pmovsxwd m1, m1 > > - movu [r2], m0 > + movu [r2 ], m0 > movu [r2 + 16], m1 > add r2, 32 > + > + pxor m4, m4 > + pcmpeqw m2, m4 > + psubw m7, m2 > + > dec r4d > - > - packssdw m0, m1 > - pxor m4, m4 > - pcmpeqw m0, m4 > - psubw m7, m0 > - > jnz .loop > > packuswb m7, m7 > @@ -997,10 +989,8 @@ > mov eax, r3d > movd r4d, m7 > sub eax, r4d ; numSig > - > RET > > - > > > ;----------------------------------------------------------------------------- > ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, > int scale, int shift) > > > ;----------------------------------------------------------------------------- > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel