> for x264 code style, we need write as movu here code style fixed, thanks.
# HG changeset patch # User Satoshi Nakagawa <[email protected]> # Date 1392457490 -32400 # Sat Feb 15 18:44:50 2014 +0900 # Node ID 5093f4b038285cdd2466fac540884b8be93d2c87 # Parent 289b4ef4ecee345f6640d1602c6caf2c5b215d2d primitives: add count_nonzero diff -r 289b4ef4ecee -r 5093f4b03828 source/Lib/TLibEncoder/TEncEntropy.cpp --- a/source/Lib/TLibEncoder/TEncEntropy.cpp Sat Feb 15 08:12:30 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Sat Feb 15 18:44:50 2014 +0900 @@ -722,18 +722,6 @@ } } -int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size) -{ - int count = 0; - - for (int i = 0; i < size; i++) - { - count += coeff[i] != 0; - } - - return count; -} - /** encode quantization matrix * \param scalingList quantization matrix information */ diff -r 289b4ef4ecee -r 5093f4b03828 source/Lib/TLibEncoder/TEncEntropy.h --- a/source/Lib/TLibEncoder/TEncEntropy.h Sat Feb 15 08:12:30 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncEntropy.h Sat Feb 15 18:44:50 2014 +0900 @@ -189,7 +189,6 @@ void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int height, TextType ttype); void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx); void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp); - static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize); }; // END CLASS DEFINITION TEncEntropy } //! \} diff -r 289b4ef4ecee -r 5093f4b03828 source/Lib/TLibEncoder/TEncSbac.cpp --- a/source/Lib/TLibEncoder/TEncSbac.cpp Sat Feb 15 08:12:30 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSbac.cpp Sat Feb 15 18:44:50 2014 +0900 @@ -36,6 +36,7 @@ */ #include "TEncSbac.h" +#include "primitives.h" namespace x265 { //! \ingroup TLibEncoder @@ -2105,7 +2106,7 @@ assert(width <= m_slice->getSPS()->getMaxTrSize()); // compute number of significant coefficients - uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * height); + uint32_t numSig = primitives.count_nonzero(coeff, width * height); if (numSig == 0) return; diff -r 289b4ef4ecee -r 5093f4b03828 source/common/dct.cpp --- a/source/common/dct.cpp Sat Feb 15 08:12:30 2014 +0530 +++ b/source/common/dct.cpp Sat Feb 15 18:44:50 2014 +0900 @@ -797,6 +797,20 @@ return acSum; } + +int count_nonzero_c(const int32_t *quantCoeff, int numCoeff) +{ + assert(numCoeff > 0 && (numCoeff & 15) == 0); + + int count = 0; + + for (int i = 0; i < numCoeff; i++) + { + count += quantCoeff[i] != 0; + } + + return count; +} } // closing - anonymous file-static namespace namespace x265 { @@ -817,5 +831,6 @@ p.idct[IDCT_8x8] = idct8_c; p.idct[IDCT_16x16] = idct16_c; p.idct[IDCT_32x32] = idct32_c; + p.count_nonzero = count_nonzero_c; } } diff -r 289b4ef4ecee -r 5093f4b03828 source/common/primitives.h --- a/source/common/primitives.h Sat Feb 15 08:12:30 2014 +0530 +++ b/source/common/primitives.h Sat Feb 15 18:44:50 2014 +0900 @@ -158,6 +158,7 @@ typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos); typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift); typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); +typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff); typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); @@ -240,6 +241,7 @@ quant_t quant; dequant_scaling_t dequant_scaling; dequant_normal_t dequant_normal; + count_nonzero_t count_nonzero; calcresidual_t calcresidual[NUM_SQUARE_BLOCKS]; calcrecon_t calcrecon[NUM_SQUARE_BLOCKS]; diff -r 289b4ef4ecee -r 5093f4b03828 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Sat Feb 15 08:12:30 2014 +0530 +++ b/source/common/x86/asm-primitives.cpp Sat Feb 15 18:44:50 2014 +0900 @@ -1084,6 +1084,7 @@ p.dct[DCT_4x4] = x265_dct4_sse2; p.idct[IDCT_4x4] = x265_idct4_sse2; p.idct[IDST_4x4] = x265_idst4_sse2; + p.count_nonzero = x265_count_nonzero_sse2; } if (cpuMask & X265_CPU_SSSE3) { diff -r 289b4ef4ecee -r 5093f4b03828 source/common/x86/pixel-util.h --- a/source/common/x86/pixel-util.h Sat Feb 15 08:12:30 2014 +0530 +++ b/source/common/x86/pixel-util.h Sat Feb 15 18:44:50 2014 +0900 @@ -46,6 +46,7 @@ uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos); void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); +int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff); void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); diff -r 289b4ef4ecee -r 5093f4b03828 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Sat Feb 15 08:12:30 2014 +0530 +++ b/source/common/x86/pixel-util8.asm Sat Feb 15 18:44:50 2014 +0900 @@ -1194,6 +1194,37 @@ jnz .loop RET + +;----------------------------------------------------------------------------- +; int count_nonzero(const int32_t *quantCoeff, int numCoeff); +;----------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal count_nonzero, 2,3,4 + pxor m0, m0 + pxor m1, m1 + mov r2d, r1d + shr r1d, 3 + +.loop + movu m2, [r0] ; TODO: mova + movu m3, [r0 + 16] ; TODO: mova + add r0, 32 + packssdw m2, m3 + pcmpeqw m2, m0 + psrlw m2, 15 + packsswb m2, m2 + psadbw m2, m0 + paddd m1, m2 + dec r1d + jnz .loop + + movd r1d, m1 + sub r2d, r1d + mov eax, r2d + + RET + + ;----------------------------------------------------------------------------------------------------------------------------------------------- ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset) ;----------------------------------------------------------------------------------------------------------------------------------------------- _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
