On Mon, Feb 17, 2014 at 12:08 AM, Satoshi Nakagawa <[email protected]>wrote:
> # HG changeset patch > # User Satoshi Nakagawa <[email protected]> > # Date 1392617016 -32400 > # Mon Feb 17 15:03:36 2014 +0900 > # Node ID 8dc1c9646b23a0e1110bef8a10ebfe3fee5d4250 > # Parent ce96cdb390fe26aee6effa731e51303c1d9056b0 > primitives: add count_nonzero > Queued. Please add a unit test for this primitive to one of the existing test benches > > diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.cpp > --- a/source/Lib/TLibEncoder/TEncEntropy.cpp Sun Feb 16 22:47:32 2014 > -0600 > +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Feb 17 15:03:36 2014 > +0900 > @@ -724,18 +724,6 @@ > } > } > > -int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size) > -{ > - int count = 0; > - > - for (int i = 0; i < size; i++) > - { > - count += coeff[i] != 0; > - } > - > - return count; > -} > - > /** encode quantization matrix > * \param scalingList quantization matrix information > */ > diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.h > --- a/source/Lib/TLibEncoder/TEncEntropy.h Sun Feb 16 22:47:32 2014 > -0600 > +++ b/source/Lib/TLibEncoder/TEncEntropy.h Mon Feb 17 15:03:36 2014 > +0900 > @@ -189,7 +189,6 @@ > void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int > height, TextType ttype); > void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx); > void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int > ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int > allowMergeLeft, int allowMergeUp); > - static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize); > }; // END CLASS DEFINITION TEncEntropy > } > //! \} > diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSbac.cpp > --- a/source/Lib/TLibEncoder/TEncSbac.cpp Sun Feb 16 22:47:32 2014 > -0600 > +++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Feb 17 15:03:36 2014 > +0900 > @@ -36,6 +36,7 @@ > */ > > #include "TEncSbac.h" > +#include "primitives.h" > > namespace x265 { > //! \ingroup TLibEncoder > @@ -2106,7 +2107,7 @@ > assert(width <= m_slice->getSPS()->getMaxTrSize()); > > // compute number of significant coefficients > - uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * > height); > + uint32_t numSig = primitives.count_nonzero(coeff, width * height); > > if (numSig == 0) > return; > diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSearch.cpp > --- a/source/Lib/TLibEncoder/TEncSearch.cpp Sun Feb 16 22:47:32 2014 > -0600 > +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Feb 17 15:03:36 2014 > +0900 > @@ -87,9 +87,9 @@ > const uint32_t numLayersToAllocate = > m_cfg->getQuadtreeTULog2MaxSize() - m_cfg->getQuadtreeTULog2MinSize() + 1; > for (uint32_t i = 0; i < numLayersToAllocate; ++i) > { > - delete[] m_qtTempCoeffY[i]; > - delete[] m_qtTempCoeffCb[i]; > - delete[] m_qtTempCoeffCr[i]; > + X265_FREE(m_qtTempCoeffY[i]); > + X265_FREE(m_qtTempCoeffCb[i]); > + X265_FREE(m_qtTempCoeffCr[i]); > m_qtTempTComYuv[i].destroy(); > } > } > @@ -98,9 +98,9 @@ > delete[] m_qtTempCoeffCr; > delete[] m_qtTempTrIdx; > delete[] m_qtTempTComYuv; > - delete[] m_qtTempTUCoeffY; > - delete[] m_qtTempTUCoeffCb; > - delete[] m_qtTempTUCoeffCr; > + X265_FREE(m_qtTempTUCoeffY); > + X265_FREE(m_qtTempTUCoeffCb); > + X265_FREE(m_qtTempTUCoeffCr); > for (uint32_t i = 0; i < 3; ++i) > { > delete[] m_qtTempCbf[i]; > @@ -155,19 +155,18 @@ > > for (uint32_t i = 0; i < numLayersToAllocate; ++i) > { > - m_qtTempCoeffY[i] = new TCoeff[g_maxCUWidth * g_maxCUHeight]; > - > - m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) > * (g_maxCUHeight >> m_vChromaShift)]; > - m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) > * (g_maxCUHeight >> m_vChromaShift)]; > + m_qtTempCoeffY[i] = X265_MALLOC(TCoeff, g_maxCUWidth * > g_maxCUHeight); > + m_qtTempCoeffCb[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> > m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)); > + m_qtTempCoeffCr[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> > m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)); > m_qtTempTComYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, > cfg->param.internalCsp); > } > > m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT]; > m_sharedPredTransformSkip[1] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT]; > m_sharedPredTransformSkip[2] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT]; > - m_qtTempTUCoeffY = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT]; > - m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT]; > - m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT]; > + m_qtTempTUCoeffY = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT); > + m_qtTempTUCoeffCb = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT); > + m_qtTempTUCoeffCr = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT); > > m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight, > cfg->param.internalCsp); > > diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/dct.cpp > --- a/source/common/dct.cpp Sun Feb 16 22:47:32 2014 -0600 > +++ b/source/common/dct.cpp Mon Feb 17 15:03:36 2014 +0900 > @@ -797,6 +797,21 @@ > > return acSum; > } > + > +int count_nonzero_c(const int32_t *quantCoeff, int numCoeff) > +{ > + assert(((intptr_t)quantCoeff & 15) == 0); > + assert(numCoeff > 0 && (numCoeff & 15) == 0); > + > + int count = 0; > + > + for (int i = 0; i < numCoeff; i++) > + { > + count += quantCoeff[i] != 0; > + } > + > + return count; > +} > } // closing - anonymous file-static namespace > > namespace x265 { > @@ -817,5 +832,6 @@ > p.idct[IDCT_8x8] = idct8_c; > p.idct[IDCT_16x16] = idct16_c; > p.idct[IDCT_32x32] = idct32_c; > + p.count_nonzero = count_nonzero_c; > } > } > diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/primitives.h > --- a/source/common/primitives.h Sun Feb 16 22:47:32 2014 -0600 > +++ b/source/common/primitives.h Mon Feb 17 15:03:36 2014 +0900 > @@ -158,6 +158,7 @@ > typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t > *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* > lastPos); > typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t > *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift); > typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, > int num, int scale, int shift); > +typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff); > > typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, > intptr_t dstStride, int width, int height, int w0, int round, int shift, > int offset); > typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t > srcStride, intptr_t dstStride, int width, int height, int w0, int round, > int shift, int offset); > @@ -240,6 +241,7 @@ > quant_t quant; > dequant_scaling_t dequant_scaling; > dequant_normal_t dequant_normal; > + count_nonzero_t count_nonzero; > > calcresidual_t calcresidual[NUM_SQUARE_BLOCKS]; > calcrecon_t calcrecon[NUM_SQUARE_BLOCKS]; > diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Sun Feb 16 22:47:32 2014 > -0600 > +++ b/source/common/x86/asm-primitives.cpp Mon Feb 17 15:03:36 2014 > +0900 > @@ -1084,6 +1084,7 @@ > p.dct[DCT_4x4] = x265_dct4_sse2; > p.idct[IDCT_4x4] = x265_idct4_sse2; > p.idct[IDST_4x4] = x265_idst4_sse2; > + p.count_nonzero = x265_count_nonzero_sse2; > } > if (cpuMask & X265_CPU_SSSE3) > { > diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util.h > --- a/source/common/x86/pixel-util.h Sun Feb 16 22:47:32 2014 -0600 > +++ b/source/common/x86/pixel-util.h Mon Feb 17 15:03:36 2014 +0900 > @@ -46,6 +46,7 @@ > > uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t > *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* > lastPos); > void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, > int num, int scale, int shift); > +int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff); > > void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, > intptr_t dstStride, int width, int height, int w0, int round, int shift, > int offset); > void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, > intptr_t dstStride, int width, int height, int w0, int round, int shift, > int offset); > diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util8.asm > --- a/source/common/x86/pixel-util8.asm Sun Feb 16 22:47:32 2014 -0600 > +++ b/source/common/x86/pixel-util8.asm Mon Feb 17 15:03:36 2014 +0900 > @@ -1194,6 +1194,37 @@ > jnz .loop > RET > > + > > +;----------------------------------------------------------------------------- > +; int count_nonzero(const int32_t *quantCoeff, int numCoeff); > > +;----------------------------------------------------------------------------- > +INIT_XMM sse2 > +cglobal count_nonzero, 2,3,4 > + pxor m0, m0 > + pxor m1, m1 > + mov r2d, r1d > + shr r1d, 3 > + > +.loop > + mova m2, [r0] > + mova m3, [r0 + 16] > + add r0, 32 > + packssdw m2, m3 > + pcmpeqw m2, m0 > + psrlw m2, 15 > + packsswb m2, m2 > + psadbw m2, m0 > + paddd m1, m2 > + dec r1d > + jnz .loop > + > + movd r1d, m1 > + sub r2d, r1d > + mov eax, r2d > + > + RET > + > + > > > ;----------------------------------------------------------------------------------------------------------------------------------------------- > ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t > dstStride, int width, int height, int w0, int round, int shift, int offset) > > > ;----------------------------------------------------------------------------------------------------------------------------------------------- > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
