# HG changeset patch # User Ashok Kumar Mishra <as...@multicorewareinc.com> # Date 1510144762 -19800 # Wed Nov 08 18:09:22 2017 +0530 # Node ID 5bf57563452b17c48486ab73f3fcfa4ce4d639ba # Parent b1dfa312234ed72c3541831a15f307feaf79484d LookaheadTLD: frameVariance is not used
diff -r b1dfa312234e -r 5bf57563452b source/common/frame.cpp --- a/source/common/frame.cpp Thu Nov 30 10:06:49 2017 +0530 +++ b/source/common/frame.cpp Wed Nov 08 18:09:22 2017 +0530 @@ -84,21 +84,16 @@ m_analysisData.interData = NULL; m_analysis2Pass.analysisFramedata = NULL; } - - if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode || !!param->bAQMotion, param->rc.qgSize)) + if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize)) { X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized"); m_numRows = (m_fencPic->m_picHeight + param->maxCUSize - 1) / param->maxCUSize; m_reconRowFlag = new ThreadSafeInteger[m_numRows]; m_reconColCount = new ThreadSafeInteger[m_numRows]; - if (quantOffsets) { - int32_t cuCount; - if (param->rc.qgSize == 8) - cuCount = m_lowres.maxBlocksInRowFullRes * m_lowres.maxBlocksInColFullRes; - else - cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol; + int32_t cuCount = (param->rc.qgSize == 8) ? m_lowres.maxBlocksInRowFullRes * m_lowres.maxBlocksInColFullRes : + m_lowres.maxBlocksInRowLowRes * m_lowres.maxBlocksInColLowRes; m_quantOffsets = new float[cuCount]; } return true; diff -r b1dfa312234e -r 5bf57563452b source/common/lowres.cpp --- a/source/common/lowres.cpp Thu Nov 30 10:06:49 2017 +0530 +++ b/source/common/lowres.cpp Wed Nov 08 18:09:22 2017 +0530 @@ -24,50 +24,58 @@ #include "picyuv.h" #include "lowres.h" #include "mv.h" - using namespace X265_NS; - -bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled, uint32_t qgSize) +void TEncPicQPAdaptationLayer::create(int iWidth, int iHeight, uint32_t uiAQPartWidth, uint32_t uiAQPartHeight) +{ + m_AQPartWidth = uiAQPartWidth; + m_AQPartHeight = uiAQPartHeight; + m_NumAQPartInWidth = (iWidth + m_AQPartWidth - 1) / m_AQPartWidth; + m_NumAQPartInHeight = (iHeight + m_AQPartHeight - 1) / m_AQPartHeight; + m_dActivity = new double[m_NumAQPartInWidth * m_NumAQPartInHeight]; +} +bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize) { isLowres = true; - bframes = _bframes; + bframes = param->bframes; width = origPic->m_picWidth / 2; lines = origPic->m_picHeight / 2; lumaStride = width + 2 * origPic->m_lumaMarginX; if (lumaStride & 31) lumaStride += 32 - (lumaStride & 31); - maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; - maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; - maxBlocksInRowFullRes = maxBlocksInRow * 2; - maxBlocksInColFullRes = maxBlocksInCol * 2; - int cuCount = maxBlocksInRow * maxBlocksInCol; - int cuCountFullRes; - if (qgSize == 8) - cuCountFullRes = maxBlocksInRowFullRes * maxBlocksInColFullRes; - else - cuCountFullRes = cuCount; - + maxBlocksInRowLowRes = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; + maxBlocksInColLowRes = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; + maxBlocksInRowFullRes = maxBlocksInRowLowRes * 2; + maxBlocksInColFullRes = maxBlocksInColLowRes * 2; + int cuCountLowRes = maxBlocksInRowLowRes * maxBlocksInColLowRes; + int cuCountFullRes = (qgSize > 8) ? maxBlocksInRowLowRes * maxBlocksInColLowRes : + maxBlocksInRowFullRes * maxBlocksInColFullRes; /* rounding the width to multiple of lowres CU size */ - width = maxBlocksInRow * X265_LOWRES_CU_SIZE; - lines = maxBlocksInCol * X265_LOWRES_CU_SIZE; - + width = maxBlocksInRowLowRes * X265_LOWRES_CU_SIZE; + lines = maxBlocksInColLowRes * X265_LOWRES_CU_SIZE; size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY); size_t padoffset = lumaStride * origPic->m_lumaMarginY + origPic->m_lumaMarginX; - if (bAQEnabled) + if (!!param->rc.aqMode || !!param->bAQMotion) { CHECKED_MALLOC_ZERO(qpAqOffset, double, cuCountFullRes); CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes); CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes); CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes); - CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); if (qgSize == 8) - CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount); + CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCountLowRes); } - CHECKED_MALLOC(propagateCost, uint16_t, cuCount); - + CHECKED_MALLOC(propagateCost, uint16_t, cuCountLowRes); /* allocate lowres buffers */ CHECKED_MALLOC_ZERO(buffer[0], pixel, 4 * planesize); + m_MaxAQDepth = g_log2Size[param->maxCUSize] - g_log2Size[qgSize] + 1; + if (m_MaxAQDepth > 0) + { + m_acAQLayer = new TEncPicQPAdaptationLayer[m_MaxAQDepth]; + for (uint32_t d = 0; d < m_MaxAQDepth; d++) + { + m_acAQLayer[d].create(origPic->m_picWidth, origPic->m_picHeight, param->maxCUSize >> d, param->maxCUSize >> d); + } + } buffer[1] = buffer[0] + planesize; buffer[2] = buffer[1] + planesize; buffer[3] = buffer[2] + planesize; @@ -76,29 +84,24 @@ lowresPlane[1] = buffer[1] + padoffset; lowresPlane[2] = buffer[2] + padoffset; lowresPlane[3] = buffer[3] + padoffset; - - CHECKED_MALLOC(intraCost, int32_t, cuCount); - CHECKED_MALLOC(intraMode, uint8_t, cuCount); - + CHECKED_MALLOC(intraCost, int32_t, cuCountLowRes); + CHECKED_MALLOC(intraMode, uint8_t, cuCountLowRes); for (int i = 0; i < bframes + 2; i++) { for (int j = 0; j < bframes + 2; j++) { - CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInCol); - CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCount); + CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInColLowRes); + CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCountLowRes); } } - for (int i = 0; i < bframes + 1; i++) { - CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount); - CHECKED_MALLOC(lowresMvs[1][i], MV, cuCount); - CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCount); - CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCount); + CHECKED_MALLOC(lowresMvs[0][i], MV, cuCountLowRes); + CHECKED_MALLOC(lowresMvs[1][i], MV, cuCountLowRes); + CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCountLowRes); + CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCountLowRes); } - return true; - fail: return false; } @@ -130,10 +133,8 @@ X265_FREE(invQscaleFactor); X265_FREE(qpCuTreeOffset); X265_FREE(propagateCost); - X265_FREE(blockVariance); X265_FREE(invQscaleFactor8x8); } - // (re) initialize lowres state void Lowres::init(PicYuv *origPic, int poc) { diff -r b1dfa312234e -r 5bf57563452b source/common/lowres.h --- a/source/common/lowres.h Thu Nov 30 10:06:49 2017 +0530 +++ b/source/common/lowres.h Wed Nov 08 18:09:22 2017 +0530 @@ -102,7 +102,18 @@ } } }; +struct TEncPicQPAdaptationLayer +{ + uint32_t m_AQPartWidth; + uint32_t m_AQPartHeight; + uint32_t m_NumAQPartInWidth; + uint32_t m_NumAQPartInHeight; + double* m_dActivity; + double m_dAvgActivity; + void create(int width, int height, uint32_t AQPartWidth, uint32_t AQPartHeight); + void destroy(); +}; /* lowres buffers, sizes and strides */ struct Lowres : public ReferencePlanes { @@ -132,34 +143,30 @@ uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2]; int32_t* lowresMvCosts[2][X265_BFRAME_MAX + 1]; MV* lowresMvs[2][X265_BFRAME_MAX + 1]; - uint32_t maxBlocksInRow; - uint32_t maxBlocksInCol; + uint32_t maxBlocksInRowLowRes; + uint32_t maxBlocksInColLowRes; uint32_t maxBlocksInRowFullRes; uint32_t maxBlocksInColFullRes; - /* used for vbvLookahead */ int plannedType[X265_LOOKAHEAD_MAX + 1]; int64_t plannedSatd[X265_LOOKAHEAD_MAX + 1]; int indB; int bframes; - /* rate control / adaptive quant data */ - double* qpAqOffset; // AQ QP offset values for each 16x16 CU - double* qpCuTreeOffset; // cuTree QP offset values for each 16x16 CU + double* qpAqOffset; // AQ QP offset values for each 16x16 CU + double* qpCuTreeOffset; // cuTree QP offset values for each 16x16 CU double* qpAqMotionOffset; - int* invQscaleFactor; // qScale values for qp Aq Offsets + int* invQscaleFactor; // qScale values for qp Aq Offsets int* invQscaleFactor8x8; // temporary buffer for qg-size 8 - uint32_t* blockVariance; - uint64_t wp_ssd[3]; // This is different than SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame + uint64_t wp_ssd[3]; // This is different than SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame uint64_t wp_sum[3]; - uint64_t frameVariance; - + TEncPicQPAdaptationLayer* m_acAQLayer; + uint32_t m_MaxAQDepth; /* cutree intermediate data */ uint16_t* propagateCost; double weightedCostDelta[X265_BFRAME_MAX + 2]; ReferencePlanes weightedRef[X265_BFRAME_MAX + 2]; - - bool create(PicYuv *origPic, int _bframes, bool bAqEnabled, uint32_t qgSize); + bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize); void destroy(); void init(PicYuv *origPic, int poc); }; diff -r b1dfa312234e -r 5bf57563452b source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Thu Nov 30 10:06:49 2017 +0530 +++ b/source/encoder/encoder.cpp Wed Nov 08 18:09:22 2017 +0530 @@ -1009,10 +1009,9 @@ if (m_param->rc.qgSize == 8) cuCount = inFrame->m_lowres.maxBlocksInRowFullRes * inFrame->m_lowres.maxBlocksInColFullRes; else - cuCount = inFrame->m_lowres.maxBlocksInRow * inFrame->m_lowres.maxBlocksInCol; + cuCount = inFrame->m_lowres.maxBlocksInRowLowRes * inFrame->m_lowres.maxBlocksInColLowRes; memcpy(inFrame->m_quantOffsets, pic_in->quantOffsets, cuCount * sizeof(float)); } - if (m_pocLast == 0) m_firstPts = inFrame->m_pts; if (m_bframeDelay && m_pocLast == m_bframeDelay) diff -r b1dfa312234e -r 5bf57563452b source/encoder/sao.cpp --- a/source/encoder/sao.cpp Thu Nov 30 10:06:49 2017 +0530 +++ b/source/encoder/sao.cpp Wed Nov 08 18:09:22 2017 +0530 @@ -136,12 +136,9 @@ CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt); m_clipTable = &(m_clipTableBase[rangeExt]); - // Share with fast clip lookup table - for (int i = 0; i < rangeExt; i++) m_clipTableBase[i] = 0; - for (int i = 0; i < maxY; i++) m_clipTable[i] = (pixel)i; diff -r b1dfa312234e -r 5bf57563452b source/encoder/search.cpp --- a/source/encoder/search.cpp Thu Nov 30 10:06:49 2017 +0530 +++ b/source/encoder/search.cpp Wed Nov 08 18:09:22 2017 +0530 @@ -1951,17 +1951,14 @@ if (mvs[0].x == 0x7FFF) /* this motion search was not estimated by lookahead */ return 0; - uint32_t block_x = (cu.m_cuPelX + g_zscanToPelX[pu.puAbsPartIdx] + pu.width / 2) >> 4; uint32_t block_y = (cu.m_cuPelY + g_zscanToPelY[pu.puAbsPartIdx] + pu.height / 2) >> 4; - uint32_t idx = block_y * m_frame->m_lowres.maxBlocksInRow + block_x; - - X265_CHECK(block_x < m_frame->m_lowres.maxBlocksInRow, "block_x is too high\n"); - X265_CHECK(block_y < m_frame->m_lowres.maxBlocksInCol, "block_y is too high\n"); - + uint32_t idx = block_y * m_frame->m_lowres.maxBlocksInRowLowRes + block_x; + + X265_CHECK(block_x < m_frame->m_lowres.maxBlocksInRowLowRes, "block_x is too high\n"); + X265_CHECK(block_y < m_frame->m_lowres.maxBlocksInColLowRes, "block_y is too high\n"); return mvs[idx] << 1; /* scale up lowres mv */ } - /* Pick between the two AMVP candidates which is the best one to use as * MVP for the motion search, based on SAD cost */ int Search::selectMVP(const CUData& cu, const PredictionUnit& pu, const MV amvp[AMVP_NUM_CANDS], int list, int ref) diff -r b1dfa312234e -r 5bf57563452b source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Thu Nov 30 10:06:49 2017 +0530 +++ b/source/encoder/slicetype.cpp Wed Nov 08 18:09:22 2017 +0530 @@ -120,7 +120,93 @@ x265_emms(); return (uint32_t)sum_ssd; } +void LookaheadTLD::xPreanalyze(Frame* curFrame) +{ + const uint32_t iWidth = curFrame->m_fencPic->m_picWidth; + const uint32_t iHeight = curFrame->m_fencPic->m_picHeight; + const intptr_t iStride = curFrame->m_fencPic->m_stride; + for (uint32_t d = 0; d < curFrame->m_lowres.m_MaxAQDepth; d++) + { + const pixel* src = curFrame->m_fencPic->m_picOrg[0];; + TEncPicQPAdaptationLayer pcAQLayer = curFrame->m_lowres.m_acAQLayer[d]; + const uint32_t uiAQPartWidth = pcAQLayer.m_AQPartWidth; + const uint32_t uiAQPartHeight = pcAQLayer.m_AQPartHeight; + double* pcAQU = pcAQLayer.m_dActivity; + + double dSumAct = 0.0; + for (uint32_t y = 0; y < iHeight; y += uiAQPartHeight) + { + const uint32_t uiCurrAQPartHeight = min(uiAQPartHeight, iHeight - y); + for (uint32_t x = 0; x < iWidth; x += uiAQPartWidth, pcAQU++) + { + const uint32_t uiCurrAQPartWidth = min(uiAQPartWidth, iWidth - x); + const pixel* pBlkY = &src[x]; + uint64_t uiSum[4] = { 0, 0, 0, 0 }; + uint64_t uiSumSq[4] = { 0, 0, 0, 0 }; + uint32_t by = 0; + for (; by < uiCurrAQPartHeight >> 1; by++) + { + uint32_t bx = 0; + for (; bx < uiCurrAQPartWidth >> 1; bx++) + { + uiSum[0] += pBlkY[bx]; + uiSumSq[0] += pBlkY[bx] * pBlkY[bx]; + } + for (; bx < uiCurrAQPartWidth; bx++) + { + uiSum[1] += pBlkY[bx]; + uiSumSq[1] += pBlkY[bx] * pBlkY[bx]; + } + pBlkY += iStride; + } + for (; by < uiCurrAQPartHeight; by++) + { + uint32_t bx = 0; + for (; bx < uiCurrAQPartWidth >> 1; bx++) + { + uiSum[2] += pBlkY[bx]; + uiSumSq[2] += pBlkY[bx] * pBlkY[bx]; + } + for (; bx < uiCurrAQPartWidth; bx++) + { + uiSum[3] += pBlkY[bx]; + uiSumSq[3] += pBlkY[bx] * pBlkY[bx]; + } + pBlkY += iStride; + } + + assert((uiCurrAQPartWidth & 1) == 0); + assert((uiCurrAQPartHeight & 1) == 0); + const uint32_t pixelWidthOfQuadrants = uiCurrAQPartWidth >> 1; + const uint32_t pixelHeightOfQuadrants = uiCurrAQPartHeight >> 1; + const uint32_t numPixInAQPart = pixelWidthOfQuadrants * pixelHeightOfQuadrants; + + double dMinVar = DBL_MAX; + if (numPixInAQPart != 0) + { + for (int i = 0; i < 4; i++) + { + const double dAverage = double(uiSum[i]) / numPixInAQPart; + const double dVariance = double(uiSumSq[i]) / numPixInAQPart - dAverage * dAverage; + dMinVar = min(dMinVar, dVariance); + } + } + else + { + dMinVar = 0.0; + } + double dActivity = 1.0 + dMinVar; + *pcAQU = dActivity; + dSumAct += dActivity; + } + src += iStride * uiCurrAQPartHeight; + } + + const double dAvgAct = dSumAct / (pcAQLayer.m_NumAQPartInWidth * pcAQLayer.m_NumAQPartInHeight); + pcAQLayer.m_dAvgActivity = dAvgAct; + } +} void LookaheadTLD::calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param) { /* Actual adaptive quantization */ @@ -156,14 +242,13 @@ double strength = 0.f; if (param->rc.aqMode == X265_AQ_NONE || param->rc.aqStrength == 0) { - /* Need to init it anyways for CU tree */ - int cuCount = blockCount; - +// /* Need to init it anyways for CU tree */ +// int cuCount = blockCount; if (param->rc.aqMode && param->rc.aqStrength == 0) { if (quantOffsets) { - for (int cuxy = 0; cuxy < cuCount; cuxy++) + for (int cuxy = 0; cuxy < blockCount; cuxy++) { curFrame->m_lowres.qpCuTreeOffset[cuxy] = curFrame->m_lowres.qpAqOffset[cuxy] = quantOffsets[cuxy]; curFrame->m_lowres.invQscaleFactor[cuxy] = x265_exp2fix8(curFrame->m_lowres.qpCuTreeOffset[cuxy]); @@ -171,9 +256,9 @@ } else { - memset(curFrame->m_lowres.qpCuTreeOffset, 0, cuCount * sizeof(double)); - memset(curFrame->m_lowres.qpAqOffset, 0, cuCount * sizeof(double)); - for (int cuxy = 0; cuxy < cuCount; cuxy++) + memset(curFrame->m_lowres.qpCuTreeOffset, 0, blockCount * sizeof(double)); + memset(curFrame->m_lowres.qpAqOffset, 0, blockCount * sizeof(double)); + for (int cuxy = 0; cuxy < blockCount; cuxy++) curFrame->m_lowres.invQscaleFactor[cuxy] = 256; } } @@ -188,35 +273,34 @@ } else { + if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED) + { + xPreanalyze(curFrame); + } + blockXY = 0; double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0; double bias_strength = 0.f; if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED) { double bit_depth_correction = 1.f / (1 << (2*(X265_DEPTH-8))); - curFrame->m_lowres.frameVariance = 0; - uint64_t rowVariance = 0; + for (blockY = 0; blockY < maxRow; blockY += loopIncr) { - rowVariance = 0; for (blockX = 0; blockX < maxCol; blockX += loopIncr) { uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); - curFrame->m_lowres.blockVariance[blockXY] = energy; - rowVariance += energy; qp_adj = pow(energy * bit_depth_correction + 1, 0.1); curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj; avg_adj += qp_adj; avg_adj_pow2 += qp_adj * qp_adj; blockXY++; } - curFrame->m_lowres.frameVariance += (rowVariance / maxCol); } - curFrame->m_lowres.frameVariance /= maxRow; avg_adj /= blockCount; avg_adj_pow2 /= blockCount; strength = param->rc.aqStrength * avg_adj; - avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (modeTwoConst)) / avg_adj; + avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst) / avg_adj; bias_strength = param->rc.aqStrength; } else diff -r b1dfa312234e -r 5bf57563452b source/encoder/slicetype.h --- a/source/encoder/slicetype.h Thu Nov 30 10:06:49 2017 +0530 +++ b/source/encoder/slicetype.h Wed Nov 08 18:09:22 2017 +0530 @@ -82,14 +82,11 @@ } ~LookaheadTLD() { X265_FREE(wbuffer[0]); } - void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param); void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize); - void weightsAnalyse(Lowres& fenc, Lowres& ref); - + void xPreanalyze(Frame* curFrame); protected: - uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize); uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp); _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel