# HG changeset patch # User Bhavna Hariharan <bha...@multicorewareinc.com> # Date 1520595579 -19800 # Fri Mar 09 17:09:39 2018 +0530 # Node ID e5425bd33176d6366f34d93e80f9cb1c9c4ebe6f # Parent d292dacb81d8607ce0b2fb106b7383b360863e9d dynamic-refine: enable switching between inter refinement levels 0-3 based on the content and the encoder properties.
The algorithm has 2 parts 1) Training - Encode frames with refine-inter 3 and calulate corresponding feature values until saturation of values. The training restarts when a scenecut is encountered. 2) Classification - Based on the prior probability calculated from the training data and the feature metric of the current CU an appropriate refine-inter level is chosen for the CU. diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.cpp --- a/source/common/cudata.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/cudata.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -317,6 +317,16 @@ m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); + + if (m_encData->m_param->bDynamicRefine) + { + int size = m_encData->m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size); + CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size); + CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size); + } +fail: + return; } // initialize Sub partition diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.h --- a/source/common/cudata.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/cudata.h Fri Mar 09 17:09:39 2018 +0530 @@ -224,6 +224,11 @@ uint64_t m_fAc_den[3]; uint64_t m_fDc_den[3]; + /* Feature values per CTU for dynamic refinement */ + uint64_t* m_collectCURd; + uint32_t* m_collectCUVariance; + uint32_t* m_collectCUCount; + CUData(); void initialize(const CUDataMemPool& dataPool, uint32_t depth, const x265_param& param, int instance); diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.cpp --- a/source/common/frame.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/frame.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -53,6 +53,7 @@ m_addOnDepth = NULL; m_addOnCtuInfo = NULL; m_addOnPrevChange = NULL; + m_classifyFrame = false; } bool Frame::create(x265_param *param, float* quantOffsets) @@ -85,6 +86,14 @@ m_analysis2Pass.analysisFramedata = NULL; } + if (param->bDynamicRefine) + { + int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + CHECKED_MALLOC_ZERO(m_classifyRd, uint64_t, size); + CHECKED_MALLOC_ZERO(m_classifyVariance, uint64_t, size); + CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size); + } + if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode || !!param->bAQMotion, param->rc.qgSize)) { X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized"); @@ -226,4 +235,11 @@ } m_lowres.destroy(); X265_FREE(m_rcData); + + if (m_param->bDynamicRefine) + { + X265_FREE_ZERO(m_classifyRd); + X265_FREE_ZERO(m_classifyVariance); + X265_FREE_ZERO(m_classifyCount); + } } diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.h --- a/source/common/frame.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/frame.h Fri Mar 09 17:09:39 2018 +0530 @@ -122,6 +122,14 @@ uint8_t** m_addOnDepth; uint8_t** m_addOnCtuInfo; int** m_addOnPrevChange; + + /* Average feature values of frames being considered for classification */ + uint64_t* m_classifyRd; + uint64_t* m_classifyVariance; + uint32_t* m_classifyCount; + + bool m_classifyFrame; + Frame(); bool create(x265_param *param, float* quantOffsets); diff -r d292dacb81d8 -r e5425bd33176 source/common/lowres.cpp --- a/source/common/lowres.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/lowres.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -59,10 +59,12 @@ CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes); CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes); CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes); - CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); if (qgSize == 8) CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount); } + + if (origPic->m_param->bDynamicRefine) + CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); CHECKED_MALLOC(propagateCost, uint16_t, cuCount); /* allocate lowres buffers */ diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/analysis.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -1184,7 +1184,7 @@ if (m_evaluateInter) { - if (m_param->interRefine == 2) + if (m_refineLevel == 2) { if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP) skipModes = true; @@ -1307,7 +1307,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); if (m_param->rdLevel) - skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2) + skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth } if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bMVType && m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) @@ -1874,7 +1874,7 @@ if (m_evaluateInter) { - if (m_param->interRefine == 2) + if (m_refineLevel == 2) { if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP) skipModes = true; @@ -2004,7 +2004,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2) && + skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && !md.bestMode->cu.getQtRootCbf(0); refMasks[0] = allSplitRefs; md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); @@ -2413,7 +2413,16 @@ bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; - int split = (m_param->interRefine && cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth); + TrainingData td; + td.init(parentCTU, cuGeom); + + if (!m_param->bDynamicRefine) + m_refineLevel = m_param->interRefine; + else + m_refineLevel = m_frame->m_classifyFrame ? 0 : 3; + + int split = (m_refineLevel && cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth); + td.split = split; if (bDecidedDepth) { @@ -2423,7 +2432,7 @@ md.bestMode = &mode; mode.cu.initSubCU(parentCTU, cuGeom, qp); PartSize size = (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx]; - if (parentCTU.isIntra(cuGeom.absPartIdx) && m_param->interRefine < 2) + if (parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel < 2) { if (m_param->intraRefine == 4) compressIntraCU(parentCTU, cuGeom, qp); @@ -2439,7 +2448,7 @@ checkIntra(mode, cuGeom, size); } } - else if (!parentCTU.isIntra(cuGeom.absPartIdx) && m_param->interRefine < 2) + else if (!parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel < 2) { mode.cu.copyFromPic(parentCTU, cuGeom, m_csp, false); uint32_t numPU = parentCTU.getNumPartInter(cuGeom.absPartIdx); @@ -2501,7 +2510,7 @@ } motionCompensation(mode.cu, pu, mode.predYuv, true, (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)); } - if (!m_param->interRefine && parentCTU.isSkipped(cuGeom.absPartIdx)) + if (!m_param->interRefine && !m_param->bDynamicRefine && parentCTU.isSkipped(cuGeom.absPartIdx)) encodeResAndCalcRdSkipCU(mode); else encodeResAndCalcRdInterCU(mode, cuGeom); @@ -2512,7 +2521,7 @@ checkDQP(mode, cuGeom); } - if (m_param->interRefine < 2) + if (m_refineLevel < 2) { if (m_bTryLossless) tryLossless(cuGeom); @@ -2540,7 +2549,10 @@ } } - if (m_param->interRefine > 1 || (m_param->interRefine && parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && !mode.cu.isSkipped(0))) + if (m_param->bDynamicRefine) + classifyCU(parentCTU,cuGeom, *md.bestMode, td); + + if (m_refineLevel > 1 || (m_refineLevel && parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && !mode.cu.isSkipped(0))) { m_evaluateInter = 1; m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU, cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp); @@ -2599,7 +2611,7 @@ else updateModeCost(*splitPred); - if (m_param->interRefine) + if (m_refineLevel) { if (m_param->rdLevel > 1) checkBestMode(*splitPred, cuGeom.depth); @@ -2613,6 +2625,83 @@ md.bestMode->cu.copyToPic(depth); md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx); } + if (m_param->bDynamicRefine && bDecidedDepth) + trainCU(parentCTU, cuGeom, *md.bestMode, td); +} + +void Analysis::classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData) +{ + uint32_t depth = cuGeom.depth; + trainData.cuVariance = calculateCUVariance(ctu, cuGeom); + if (m_frame->m_classifyFrame) + { + uint64_t diffRefine[X265_REFINE_INTER_LEVELS]; + uint64_t diffRefineRd[X265_REFINE_INTER_LEVELS]; + float probRefine[X265_REFINE_INTER_LEVELS] = { 0 }; + uint8_t varRefineLevel = 0; + uint8_t rdRefineLevel = 0; + uint64_t cuCost = bestMode.rdCost; + + int offset = (depth * X265_REFINE_INTER_LEVELS) + 1; + if (cuCost < m_frame->m_classifyRd[offset]) + m_refineLevel = 1; + else + { + uint64_t trainingCount = 0; + for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) + { + offset = (depth * X265_REFINE_INTER_LEVELS) + i; + trainingCount += m_frame->m_classifyCount[offset]; + } + for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) + { + offset = (depth * X265_REFINE_INTER_LEVELS) + i; + /* Calculate distance values */ + diffRefine[i] = abs((int64_t)(trainData.cuVariance - m_frame->m_classifyVariance[offset])); + diffRefineRd[i] = abs((int64_t)(cuCost - m_frame->m_classifyRd[offset])); + + /* Calculate prior probability - ranges between 0 and 1 */ + if (trainingCount) + probRefine[i] = ((float)m_frame->m_classifyCount[offset] / (float)trainingCount); + + /* Bayesian classification - P(c|x)P(x) = P(x|c)P(c) + P(c|x) is the posterior probability of class given predictor. + P(c) is the prior probability of class. + P(x|c) is the likelihood which is the probability of predictor given class. + P(x) is the prior probability of predictor.*/ + if ((diffRefine[i] * probRefine[m_refineLevel]) < (diffRefine[m_refineLevel] * probRefine[i])) + varRefineLevel = i; + if ((diffRefineRd[i] * probRefine[m_refineLevel]) < (diffRefineRd[m_refineLevel] * probRefine[i])) + rdRefineLevel = i; + } + m_refineLevel = X265_MAX(varRefineLevel, rdRefineLevel); + } + } +} + +void Analysis::trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData) +{ + uint32_t depth = cuGeom.depth; + int classify = 0; + if (!m_frame->m_classifyFrame) + { + if (trainData.predMode == ctu.m_predMode[cuGeom.absPartIdx] && trainData.partSize == ctu.m_partSize[cuGeom.absPartIdx] + && trainData.mergeFlag == ctu.m_mergeFlag[cuGeom.absPartIdx]) + classify = 0; + else if ((depth == m_param->maxCUDepth - 1) && trainData.split) + classify = 1; + else if (trainData.partSize == SIZE_2Nx2N && trainData.partSize == ctu.m_partSize[cuGeom.absPartIdx]) + classify = 2; + else + classify = 3; + } + else + classify = m_refineLevel; + uint64_t cuCost = bestMode.rdCost; + int offset = (depth * X265_REFINE_INTER_LEVELS) + classify; + ctu.m_collectCURd[offset] += cuCost; + ctu.m_collectCUVariance[offset] += trainData.cuVariance; + ctu.m_collectCUCount[offset]++; } /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */ @@ -3414,6 +3503,33 @@ return false; } +uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom) +{ + uint32_t cuVariance = 0; + uint32_t *blockVariance = m_frame->m_lowres.blockVariance; + int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16; + + uint32_t width = m_frame->m_fencPic->m_picWidth; + uint32_t height = m_frame->m_fencPic->m_picHeight; + uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; + uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; + uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr; + uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth; + uint32_t cnt = 0; + + for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += loopIncr) + { + for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += loopIncr) + { + uint32_t idx = ((block_yy / loopIncr) * (maxCols)) + (block_xx / loopIncr); + cuVariance += blockVariance[idx]; + cnt++; + } + } + + return cuVariance / cnt; +} + int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, int32_t complexCheck, double baseQp) { FrameData& curEncData = *m_frame->m_encData; diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.h --- a/source/encoder/analysis.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/analysis.h Fri Mar 09 17:09:39 2018 +0530 @@ -142,8 +142,29 @@ uint8_t* m_multipassModes; uint8_t m_evaluateInter; + int32_t m_refineLevel; + uint8_t* m_additionalCtuInfo; int* m_prevCtuInfoChange; + + struct TrainingData + { + uint32_t cuVariance; + uint8_t predMode; + uint8_t partSize; + uint8_t mergeFlag; + int split; + + void init(const CUData& parentCTU, const CUGeom& cuGeom) + { + cuVariance = 0; + predMode = parentCTU.m_predMode[cuGeom.absPartIdx]; + partSize = parentCTU.m_partSize[cuGeom.absPartIdx]; + mergeFlag = parentCTU.m_mergeFlag[cuGeom.absPartIdx]; + split = 0; + } + }; + /* refine RD based on QP for rd-levels 5 and 6 */ void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp); @@ -182,6 +203,10 @@ void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom); int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, int32_t complexCheck = 0, double baseQP = -1); + uint32_t calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom); + + void classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData); + void trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData); void calculateNormFactor(CUData& ctu, int qp); void normFactor(const pixel* src, uint32_t blockSize, CUData& ctu, int qp, TextType ttype); diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/encoder.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -96,6 +96,7 @@ #endif m_prevTonemapPayload.payload = NULL; + m_startPoint = 0; } inline char *strcatFilename(const char *input, const char *suffix) { @@ -412,6 +413,17 @@ if (m_bToneMap) m_numCimInfo = m_hdr10plus_api->hdr10plus_json_to_movie_cim(m_param->toneMapFile, m_cim); #endif + + if (m_param->bDynamicRefine) + { + int size = m_param->totalFrames * m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + CHECKED_MALLOC_ZERO(m_variance, uint64_t, size); + CHECKED_MALLOC_ZERO(m_rdCost, uint64_t, size); + CHECKED_MALLOC_ZERO(m_trainingCount, uint32_t, size); + return; + fail: + m_aborted = true; + } } void Encoder::stopJobs() @@ -697,7 +709,13 @@ if (m_bToneMap) m_hdr10plus_api->hdr10plus_clear_movie(m_cim, m_numCimInfo); #endif - + + if (m_param->bDynamicRefine) + { + X265_FREE(m_variance); + X265_FREE(m_rdCost); + X265_FREE(m_trainingCount); + } if (m_exportedPic) { ATOMIC_DEC(&m_exportedPic->m_countRefEncoders); diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.h --- a/source/encoder/encoder.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/encoder.h Fri Mar 09 17:09:39 2018 +0530 @@ -221,6 +221,13 @@ x265_sei_payload m_prevTonemapPayload; + /* Collect frame level feature data */ + uint64_t* m_rdCost; + uint64_t* m_variance; + uint32_t* m_trainingCount; + int32_t m_startPoint; + Lock m_dynamicRefineLock; + Encoder(); ~Encoder() { diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/frameencoder.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -736,6 +736,9 @@ m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames } + if (m_param->bDynamicRefine) + computeAvgTrainingData(); + /* Analyze CTU rows, most of the hard work is done here. Frame is * compressed in a wave-front pattern if WPP is enabled. Row based loop * filters runs behind the CTU compression and reconstruction */ @@ -1457,6 +1460,30 @@ // Does all the CU analysis, returns best top level mode decision Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder); + if (m_param->bDynamicRefine) + { + { + ScopedLock dynLock(m_top->m_dynamicRefineLock); + for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) + { + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) + { + int offset = (depth * X265_REFINE_INTER_LEVELS) + i; + int index = (m_frame->m_encodeOrder * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset; + if (ctu->m_collectCUCount[offset]) + { + m_top->m_variance[index] += ctu->m_collectCUVariance[offset]; + m_top->m_rdCost[index] += ctu->m_collectCURd[offset]; + m_top->m_trainingCount[index] += ctu->m_collectCUCount[offset]; + } + } + } + } + X265_FREE_ZERO(ctu->m_collectCUVariance); + X265_FREE_ZERO(ctu->m_collectCURd); + X265_FREE_ZERO(ctu->m_collectCUCount); + } + // take a sample of the current active worker count ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount); ATOMIC_INC(&m_activeWorkerCountSamples); @@ -1839,6 +1866,58 @@ m_completionEvent.trigger(); } +void FrameEncoder::computeAvgTrainingData() +{ + if (m_frame->m_lowres.bScenecut) + m_top->m_startPoint = m_frame->m_encodeOrder; + + if (m_frame->m_encodeOrder - m_top->m_startPoint < 2 * m_param->frameNumThreads) + m_frame->m_classifyFrame = false; + else + m_frame->m_classifyFrame = true; + + int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + memset(m_frame->m_classifyRd, 0, size * sizeof(uint64_t)); + memset(m_frame->m_classifyVariance, 0, size * sizeof(uint64_t)); + memset(m_frame->m_classifyCount, 0, size * sizeof(uint32_t)); + + if (m_frame->m_classifyFrame) + { + uint32_t limit = m_frame->m_encodeOrder - m_param->frameNumThreads - 1; + for (uint32_t i = m_top->m_startPoint + 1; i < limit; i++) + { + for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++) + { + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) + { + int offset = (depth * X265_REFINE_INTER_LEVELS) + j; + int index = (i* X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset; + if (m_top->m_trainingCount[index]) + { + m_frame->m_classifyRd[offset] += m_top->m_rdCost[index] / m_top->m_trainingCount[index]; + m_frame->m_classifyVariance[offset] += m_top->m_variance[index] / m_top->m_trainingCount[index]; + m_frame->m_classifyCount[offset] += m_top->m_trainingCount[index]; + } + } + } + } + /* Calculates the average feature values of historic frames that are being considered for the current frame */ + int historyCount = m_frame->m_encodeOrder - m_param->frameNumThreads - m_top->m_startPoint - 1; + if (historyCount) + { + for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++) + { + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) + { + int offset = (depth * X265_REFINE_INTER_LEVELS) + j; + m_frame->m_classifyRd[offset] /= historyCount; + m_frame->m_classifyVariance[offset] /= historyCount; + } + } + } + } +} + /* collect statistics about CU coding decisions, return total QP */ int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats* log) { diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/frameencoder.h Fri Mar 09 17:09:39 2018 +0530 @@ -230,6 +230,7 @@ void threadMain(); int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog); void noiseReductionUpdate(); + void computeAvgTrainingData(); /* Called by WaveFront::findJob() */ virtual void processRow(int row, int threadId); diff -r d292dacb81d8 -r e5425bd33176 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/slicetype.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -178,12 +178,12 @@ } } - /* Need variance data for weighted prediction */ + /* Need variance data for weighted prediction and dynamic refinement*/ if (param->bEnableWeightedPred || param->bEnableWeightedBiPred) - { + { for (blockY = 0; blockY < maxRow; blockY += loopIncr) - for (blockX = 0; blockX < maxCol; blockX += loopIncr) - acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); + for (blockX = 0; blockX < maxCol; blockX += loopIncr) + acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); } } else @@ -240,7 +240,7 @@ else { uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp,param->rc.qgSize); - qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8))); + qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8))); } if (param->bHDROpt) @@ -308,6 +308,17 @@ curFrame->m_lowres.wp_ssd[i] = ssd - (sum * sum + (width[i] * height[i]) / 2) / (width[i] * height[i]); } } + + if (param->bDynamicRefine) + { + blockXY = 0; + for (blockY = 0; blockY < maxRow; blockY += loopIncr) + for (blockX = 0; blockX < maxCol; blockX += loopIncr) + { + curFrame->m_lowres.blockVariance[blockXY] = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); + blockXY++; + } + } } void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
# HG changeset patch # User Bhavna Hariharan <bha...@multicorewareinc.com> # Date 1520595579 -19800 # Fri Mar 09 17:09:39 2018 +0530 # Node ID e5425bd33176d6366f34d93e80f9cb1c9c4ebe6f # Parent d292dacb81d8607ce0b2fb106b7383b360863e9d dynamic-refine: enable switching between inter refinement levels 0-3 based on the content and the encoder properties. The algorithm has 2 parts 1) Training - Encode frames with refine-inter 3 and calulate corresponding feature values until saturation of values. The training restarts when a scenecut is encountered. 2) Classification - Based on the prior probability calculated from the training data and the feature metric of the current CU an appropriate refine-inter level is chosen for the CU. diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.cpp --- a/source/common/cudata.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/cudata.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -317,6 +317,16 @@ m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); + + if (m_encData->m_param->bDynamicRefine) + { + int size = m_encData->m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size); + CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size); + CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size); + } +fail: + return; } // initialize Sub partition diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.h --- a/source/common/cudata.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/cudata.h Fri Mar 09 17:09:39 2018 +0530 @@ -224,6 +224,11 @@ uint64_t m_fAc_den[3]; uint64_t m_fDc_den[3]; + /* Feature values per CTU for dynamic refinement */ + uint64_t* m_collectCURd; + uint32_t* m_collectCUVariance; + uint32_t* m_collectCUCount; + CUData(); void initialize(const CUDataMemPool& dataPool, uint32_t depth, const x265_param& param, int instance); diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.cpp --- a/source/common/frame.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/frame.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -53,6 +53,7 @@ m_addOnDepth = NULL; m_addOnCtuInfo = NULL; m_addOnPrevChange = NULL; + m_classifyFrame = false; } bool Frame::create(x265_param *param, float* quantOffsets) @@ -85,6 +86,14 @@ m_analysis2Pass.analysisFramedata = NULL; } + if (param->bDynamicRefine) + { + int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + CHECKED_MALLOC_ZERO(m_classifyRd, uint64_t, size); + CHECKED_MALLOC_ZERO(m_classifyVariance, uint64_t, size); + CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size); + } + if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode || !!param->bAQMotion, param->rc.qgSize)) { X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized"); @@ -226,4 +235,11 @@ } m_lowres.destroy(); X265_FREE(m_rcData); + + if (m_param->bDynamicRefine) + { + X265_FREE_ZERO(m_classifyRd); + X265_FREE_ZERO(m_classifyVariance); + X265_FREE_ZERO(m_classifyCount); + } } diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.h --- a/source/common/frame.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/frame.h Fri Mar 09 17:09:39 2018 +0530 @@ -122,6 +122,14 @@ uint8_t** m_addOnDepth; uint8_t** m_addOnCtuInfo; int** m_addOnPrevChange; + + /* Average feature values of frames being considered for classification */ + uint64_t* m_classifyRd; + uint64_t* m_classifyVariance; + uint32_t* m_classifyCount; + + bool m_classifyFrame; + Frame(); bool create(x265_param *param, float* quantOffsets); diff -r d292dacb81d8 -r e5425bd33176 source/common/lowres.cpp --- a/source/common/lowres.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/common/lowres.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -59,10 +59,12 @@ CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes); CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes); CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes); - CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); if (qgSize == 8) CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount); } + + if (origPic->m_param->bDynamicRefine) + CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); CHECKED_MALLOC(propagateCost, uint16_t, cuCount); /* allocate lowres buffers */ diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/analysis.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -1184,7 +1184,7 @@ if (m_evaluateInter) { - if (m_param->interRefine == 2) + if (m_refineLevel == 2) { if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP) skipModes = true; @@ -1307,7 +1307,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); if (m_param->rdLevel) - skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2) + skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth } if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bMVType && m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) @@ -1874,7 +1874,7 @@ if (m_evaluateInter) { - if (m_param->interRefine == 2) + if (m_refineLevel == 2) { if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP) skipModes = true; @@ -2004,7 +2004,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2) && + skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && !md.bestMode->cu.getQtRootCbf(0); refMasks[0] = allSplitRefs; md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); @@ -2413,7 +2413,16 @@ bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; - int split = (m_param->interRefine && cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth); + TrainingData td; + td.init(parentCTU, cuGeom); + + if (!m_param->bDynamicRefine) + m_refineLevel = m_param->interRefine; + else + m_refineLevel = m_frame->m_classifyFrame ? 0 : 3; + + int split = (m_refineLevel && cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth); + td.split = split; if (bDecidedDepth) { @@ -2423,7 +2432,7 @@ md.bestMode = &mode; mode.cu.initSubCU(parentCTU, cuGeom, qp); PartSize size = (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx]; - if (parentCTU.isIntra(cuGeom.absPartIdx) && m_param->interRefine < 2) + if (parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel < 2) { if (m_param->intraRefine == 4) compressIntraCU(parentCTU, cuGeom, qp); @@ -2439,7 +2448,7 @@ checkIntra(mode, cuGeom, size); } } - else if (!parentCTU.isIntra(cuGeom.absPartIdx) && m_param->interRefine < 2) + else if (!parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel < 2) { mode.cu.copyFromPic(parentCTU, cuGeom, m_csp, false); uint32_t numPU = parentCTU.getNumPartInter(cuGeom.absPartIdx); @@ -2501,7 +2510,7 @@ } motionCompensation(mode.cu, pu, mode.predYuv, true, (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)); } - if (!m_param->interRefine && parentCTU.isSkipped(cuGeom.absPartIdx)) + if (!m_param->interRefine && !m_param->bDynamicRefine && parentCTU.isSkipped(cuGeom.absPartIdx)) encodeResAndCalcRdSkipCU(mode); else encodeResAndCalcRdInterCU(mode, cuGeom); @@ -2512,7 +2521,7 @@ checkDQP(mode, cuGeom); } - if (m_param->interRefine < 2) + if (m_refineLevel < 2) { if (m_bTryLossless) tryLossless(cuGeom); @@ -2540,7 +2549,10 @@ } } - if (m_param->interRefine > 1 || (m_param->interRefine && parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && !mode.cu.isSkipped(0))) + if (m_param->bDynamicRefine) + classifyCU(parentCTU,cuGeom, *md.bestMode, td); + + if (m_refineLevel > 1 || (m_refineLevel && parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && !mode.cu.isSkipped(0))) { m_evaluateInter = 1; m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU, cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp); @@ -2599,7 +2611,7 @@ else updateModeCost(*splitPred); - if (m_param->interRefine) + if (m_refineLevel) { if (m_param->rdLevel > 1) checkBestMode(*splitPred, cuGeom.depth); @@ -2613,6 +2625,83 @@ md.bestMode->cu.copyToPic(depth); md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx); } + if (m_param->bDynamicRefine && bDecidedDepth) + trainCU(parentCTU, cuGeom, *md.bestMode, td); +} + +void Analysis::classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData) +{ + uint32_t depth = cuGeom.depth; + trainData.cuVariance = calculateCUVariance(ctu, cuGeom); + if (m_frame->m_classifyFrame) + { + uint64_t diffRefine[X265_REFINE_INTER_LEVELS]; + uint64_t diffRefineRd[X265_REFINE_INTER_LEVELS]; + float probRefine[X265_REFINE_INTER_LEVELS] = { 0 }; + uint8_t varRefineLevel = 0; + uint8_t rdRefineLevel = 0; + uint64_t cuCost = bestMode.rdCost; + + int offset = (depth * X265_REFINE_INTER_LEVELS) + 1; + if (cuCost < m_frame->m_classifyRd[offset]) + m_refineLevel = 1; + else + { + uint64_t trainingCount = 0; + for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) + { + offset = (depth * X265_REFINE_INTER_LEVELS) + i; + trainingCount += m_frame->m_classifyCount[offset]; + } + for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) + { + offset = (depth * X265_REFINE_INTER_LEVELS) + i; + /* Calculate distance values */ + diffRefine[i] = abs((int64_t)(trainData.cuVariance - m_frame->m_classifyVariance[offset])); + diffRefineRd[i] = abs((int64_t)(cuCost - m_frame->m_classifyRd[offset])); + + /* Calculate prior probability - ranges between 0 and 1 */ + if (trainingCount) + probRefine[i] = ((float)m_frame->m_classifyCount[offset] / (float)trainingCount); + + /* Bayesian classification - P(c|x)P(x) = P(x|c)P(c) + P(c|x) is the posterior probability of class given predictor. + P(c) is the prior probability of class. + P(x|c) is the likelihood which is the probability of predictor given class. + P(x) is the prior probability of predictor.*/ + if ((diffRefine[i] * probRefine[m_refineLevel]) < (diffRefine[m_refineLevel] * probRefine[i])) + varRefineLevel = i; + if ((diffRefineRd[i] * probRefine[m_refineLevel]) < (diffRefineRd[m_refineLevel] * probRefine[i])) + rdRefineLevel = i; + } + m_refineLevel = X265_MAX(varRefineLevel, rdRefineLevel); + } + } +} + +void Analysis::trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData) +{ + uint32_t depth = cuGeom.depth; + int classify = 0; + if (!m_frame->m_classifyFrame) + { + if (trainData.predMode == ctu.m_predMode[cuGeom.absPartIdx] && trainData.partSize == ctu.m_partSize[cuGeom.absPartIdx] + && trainData.mergeFlag == ctu.m_mergeFlag[cuGeom.absPartIdx]) + classify = 0; + else if ((depth == m_param->maxCUDepth - 1) && trainData.split) + classify = 1; + else if (trainData.partSize == SIZE_2Nx2N && trainData.partSize == ctu.m_partSize[cuGeom.absPartIdx]) + classify = 2; + else + classify = 3; + } + else + classify = m_refineLevel; + uint64_t cuCost = bestMode.rdCost; + int offset = (depth * X265_REFINE_INTER_LEVELS) + classify; + ctu.m_collectCURd[offset] += cuCost; + ctu.m_collectCUVariance[offset] += trainData.cuVariance; + ctu.m_collectCUCount[offset]++; } /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */ @@ -3414,6 +3503,33 @@ return false; } +uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom) +{ + uint32_t cuVariance = 0; + uint32_t *blockVariance = m_frame->m_lowres.blockVariance; + int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16; + + uint32_t width = m_frame->m_fencPic->m_picWidth; + uint32_t height = m_frame->m_fencPic->m_picHeight; + uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; + uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; + uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr; + uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth; + uint32_t cnt = 0; + + for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += loopIncr) + { + for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += loopIncr) + { + uint32_t idx = ((block_yy / loopIncr) * (maxCols)) + (block_xx / loopIncr); + cuVariance += blockVariance[idx]; + cnt++; + } + } + + return cuVariance / cnt; +} + int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, int32_t complexCheck, double baseQp) { FrameData& curEncData = *m_frame->m_encData; diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.h --- a/source/encoder/analysis.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/analysis.h Fri Mar 09 17:09:39 2018 +0530 @@ -142,8 +142,29 @@ uint8_t* m_multipassModes; uint8_t m_evaluateInter; + int32_t m_refineLevel; + uint8_t* m_additionalCtuInfo; int* m_prevCtuInfoChange; + + struct TrainingData + { + uint32_t cuVariance; + uint8_t predMode; + uint8_t partSize; + uint8_t mergeFlag; + int split; + + void init(const CUData& parentCTU, const CUGeom& cuGeom) + { + cuVariance = 0; + predMode = parentCTU.m_predMode[cuGeom.absPartIdx]; + partSize = parentCTU.m_partSize[cuGeom.absPartIdx]; + mergeFlag = parentCTU.m_mergeFlag[cuGeom.absPartIdx]; + split = 0; + } + }; + /* refine RD based on QP for rd-levels 5 and 6 */ void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp); @@ -182,6 +203,10 @@ void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom); int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, int32_t complexCheck = 0, double baseQP = -1); + uint32_t calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom); + + void classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData); + void trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& bestMode, TrainingData& trainData); void calculateNormFactor(CUData& ctu, int qp); void normFactor(const pixel* src, uint32_t blockSize, CUData& ctu, int qp, TextType ttype); diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/encoder.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -96,6 +96,7 @@ #endif m_prevTonemapPayload.payload = NULL; + m_startPoint = 0; } inline char *strcatFilename(const char *input, const char *suffix) { @@ -412,6 +413,17 @@ if (m_bToneMap) m_numCimInfo = m_hdr10plus_api->hdr10plus_json_to_movie_cim(m_param->toneMapFile, m_cim); #endif + + if (m_param->bDynamicRefine) + { + int size = m_param->totalFrames * m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + CHECKED_MALLOC_ZERO(m_variance, uint64_t, size); + CHECKED_MALLOC_ZERO(m_rdCost, uint64_t, size); + CHECKED_MALLOC_ZERO(m_trainingCount, uint32_t, size); + return; + fail: + m_aborted = true; + } } void Encoder::stopJobs() @@ -697,7 +709,13 @@ if (m_bToneMap) m_hdr10plus_api->hdr10plus_clear_movie(m_cim, m_numCimInfo); #endif - + + if (m_param->bDynamicRefine) + { + X265_FREE(m_variance); + X265_FREE(m_rdCost); + X265_FREE(m_trainingCount); + } if (m_exportedPic) { ATOMIC_DEC(&m_exportedPic->m_countRefEncoders); diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.h --- a/source/encoder/encoder.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/encoder.h Fri Mar 09 17:09:39 2018 +0530 @@ -221,6 +221,13 @@ x265_sei_payload m_prevTonemapPayload; + /* Collect frame level feature data */ + uint64_t* m_rdCost; + uint64_t* m_variance; + uint32_t* m_trainingCount; + int32_t m_startPoint; + Lock m_dynamicRefineLock; + Encoder(); ~Encoder() { diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/frameencoder.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -736,6 +736,9 @@ m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames } + if (m_param->bDynamicRefine) + computeAvgTrainingData(); + /* Analyze CTU rows, most of the hard work is done here. Frame is * compressed in a wave-front pattern if WPP is enabled. Row based loop * filters runs behind the CTU compression and reconstruction */ @@ -1457,6 +1460,30 @@ // Does all the CU analysis, returns best top level mode decision Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder); + if (m_param->bDynamicRefine) + { + { + ScopedLock dynLock(m_top->m_dynamicRefineLock); + for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) + { + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) + { + int offset = (depth * X265_REFINE_INTER_LEVELS) + i; + int index = (m_frame->m_encodeOrder * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset; + if (ctu->m_collectCUCount[offset]) + { + m_top->m_variance[index] += ctu->m_collectCUVariance[offset]; + m_top->m_rdCost[index] += ctu->m_collectCURd[offset]; + m_top->m_trainingCount[index] += ctu->m_collectCUCount[offset]; + } + } + } + } + X265_FREE_ZERO(ctu->m_collectCUVariance); + X265_FREE_ZERO(ctu->m_collectCURd); + X265_FREE_ZERO(ctu->m_collectCUCount); + } + // take a sample of the current active worker count ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount); ATOMIC_INC(&m_activeWorkerCountSamples); @@ -1839,6 +1866,58 @@ m_completionEvent.trigger(); } +void FrameEncoder::computeAvgTrainingData() +{ + if (m_frame->m_lowres.bScenecut) + m_top->m_startPoint = m_frame->m_encodeOrder; + + if (m_frame->m_encodeOrder - m_top->m_startPoint < 2 * m_param->frameNumThreads) + m_frame->m_classifyFrame = false; + else + m_frame->m_classifyFrame = true; + + int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; + memset(m_frame->m_classifyRd, 0, size * sizeof(uint64_t)); + memset(m_frame->m_classifyVariance, 0, size * sizeof(uint64_t)); + memset(m_frame->m_classifyCount, 0, size * sizeof(uint32_t)); + + if (m_frame->m_classifyFrame) + { + uint32_t limit = m_frame->m_encodeOrder - m_param->frameNumThreads - 1; + for (uint32_t i = m_top->m_startPoint + 1; i < limit; i++) + { + for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++) + { + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) + { + int offset = (depth * X265_REFINE_INTER_LEVELS) + j; + int index = (i* X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset; + if (m_top->m_trainingCount[index]) + { + m_frame->m_classifyRd[offset] += m_top->m_rdCost[index] / m_top->m_trainingCount[index]; + m_frame->m_classifyVariance[offset] += m_top->m_variance[index] / m_top->m_trainingCount[index]; + m_frame->m_classifyCount[offset] += m_top->m_trainingCount[index]; + } + } + } + } + /* Calculates the average feature values of historic frames that are being considered for the current frame */ + int historyCount = m_frame->m_encodeOrder - m_param->frameNumThreads - m_top->m_startPoint - 1; + if (historyCount) + { + for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++) + { + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) + { + int offset = (depth * X265_REFINE_INTER_LEVELS) + j; + m_frame->m_classifyRd[offset] /= historyCount; + m_frame->m_classifyVariance[offset] /= historyCount; + } + } + } + } +} + /* collect statistics about CU coding decisions, return total QP */ int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats* log) { diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/frameencoder.h Fri Mar 09 17:09:39 2018 +0530 @@ -230,6 +230,7 @@ void threadMain(); int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog); void noiseReductionUpdate(); + void computeAvgTrainingData(); /* Called by WaveFront::findJob() */ virtual void processRow(int row, int threadId); diff -r d292dacb81d8 -r e5425bd33176 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Fri Mar 09 09:44:53 2018 +0530 +++ b/source/encoder/slicetype.cpp Fri Mar 09 17:09:39 2018 +0530 @@ -178,12 +178,12 @@ } } - /* Need variance data for weighted prediction */ + /* Need variance data for weighted prediction and dynamic refinement*/ if (param->bEnableWeightedPred || param->bEnableWeightedBiPred) - { + { for (blockY = 0; blockY < maxRow; blockY += loopIncr) - for (blockX = 0; blockX < maxCol; blockX += loopIncr) - acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); + for (blockX = 0; blockX < maxCol; blockX += loopIncr) + acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); } } else @@ -240,7 +240,7 @@ else { uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp,param->rc.qgSize); - qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8))); + qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8))); } if (param->bHDROpt) @@ -308,6 +308,17 @@ curFrame->m_lowres.wp_ssd[i] = ssd - (sum * sum + (width[i] * height[i]) / 2) / (width[i] * height[i]); } } + + if (param->bDynamicRefine) + { + blockXY = 0; + for (blockY = 0; blockY < maxRow; blockY += loopIncr) + for (blockX = 0; blockX < maxCol; blockX += loopIncr) + { + curFrame->m_lowres.blockVariance[blockXY] = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); + blockXY++; + } + } } void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel