# HG changeset patch # User Bhavna Hariharan <bha...@multicorewareinc.com> # Date 1479450968 -19800 # Fri Nov 18 12:06:08 2016 +0530 # Node ID 40a0a322b26fc0516a72d4de9a941e18b5bb97b9 # Parent c5295126f248411481a8361acfd2bc8b0636cedc limitTU : use spatial and temporal CUs' TU depth to limit recursion
diff -r c5295126f248 -r 40a0a322b26f doc/reST/cli.rst --- a/doc/reST/cli.rst Fri Nov 18 11:49:05 2016 +0530 +++ b/doc/reST/cli.rst Fri Nov 18 12:06:08 2016 +0530 @@ -869,13 +869,15 @@ partitions, in which case a TU split is implied and thus the residual quad-tree begins one layer below the CU quad-tree. -.. option:: --limit-tu <0|1|2> +.. option:: --limit-tu <0..3> Enables early exit from TU depth recursion, for inter coded blocks. Level 1 - decides to recurse to next higher depth based on cost comparison of full size TU and split TU. Level 2 - based on first split subTU's depth, limits recursion of other split subTUs. + Level 3 - based on the average depth of the co-located and the neighbor + CUs' TU depth, limits recursion of the current CU. Default: 0 diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.cpp --- a/source/common/cudata.cpp Fri Nov 18 11:49:05 2016 +0530 +++ b/source/common/cudata.cpp Fri Nov 18 12:06:08 2016 +0530 @@ -296,6 +296,9 @@ /* initialize the remaining CU data in one memset */ memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions); + for (int8_t i = 0; i < NUM_TU_DEPTH; i++) + m_refTuDepth[i] = -1; + uint32_t widthInCU = m_slice->m_sps->numCuInWidth; m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL; m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL; diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.h --- a/source/common/cudata.h Fri Nov 18 11:49:05 2016 +0530 +++ b/source/common/cudata.h Fri Nov 18 12:06:08 2016 +0530 @@ -28,6 +28,8 @@ #include "slice.h" #include "mv.h" +#define NUM_TU_DEPTH 21 + namespace X265_NS { // private namespace @@ -204,6 +206,7 @@ enum { BytesPerPartition = 21 }; // combined sizeof() of all per-part data coeff_t* m_trCoeff[3]; // transformed coefficient buffer per plane + int8_t m_refTuDepth[NUM_TU_DEPTH]; // TU depth of CU at depths 0, 1 and 2 MV* m_mv[2]; // array of motion vectors per list MV* m_mvd[2]; // array of coded motion vector deltas per list diff -r c5295126f248 -r 40a0a322b26f source/common/param.cpp --- a/source/common/param.cpp Fri Nov 18 11:49:05 2016 +0530 +++ b/source/common/param.cpp Fri Nov 18 12:06:08 2016 +0530 @@ -1126,7 +1126,7 @@ "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1"); CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && param->maxTUSize != 8 && param->maxTUSize != 4), "max TU size must be 4, 8, 16, or 32"); - CHECK(param->limitTU > 2, "Invalid limit-tu option, limit-TU must be 0, 1 or 2"); + CHECK(param->limitTU > 3, "Invalid limit-tu option, limit-TU must be between 0 and 3"); CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater."); CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller."); diff -r c5295126f248 -r 40a0a322b26f source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Fri Nov 18 11:49:05 2016 +0530 +++ b/source/encoder/analysis.cpp Fri Nov 18 12:06:08 2016 +0530 @@ -203,6 +203,57 @@ return *m_modeDepth[0].bestMode; } +int32_t Analysis::loadTUDepth(CUGeom cuGeom, CUData parentCTU) +{ + float predDepth = 0; + CUData* neighbourCU; + uint8_t count = 0; + int32_t maxTUDepth = -1; + neighbourCU = m_slice->m_refFrameList[0][0]->m_encData->m_picCTU; + predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId]; + count++; + if (m_slice->isInterB()) + { + neighbourCU = m_slice->m_refFrameList[1][0]->m_encData->m_picCTU; + predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId]; + count++; + } + if (parentCTU.m_cuAbove) + { + predDepth += parentCTU.m_cuAbove->m_refTuDepth[cuGeom.geomRecurId]; + count++; + if (parentCTU.m_cuAboveLeft) + { + predDepth += parentCTU.m_cuAboveLeft->m_refTuDepth[cuGeom.geomRecurId]; + count++; + } + if (parentCTU.m_cuAboveRight) + { + predDepth += parentCTU.m_cuAboveRight->m_refTuDepth[cuGeom.geomRecurId]; + count++; + } + } + if (parentCTU.m_cuLeft) + { + predDepth += parentCTU.m_cuLeft->m_refTuDepth[cuGeom.geomRecurId]; + count++; + } + predDepth /= count; + + if (predDepth == 0) + maxTUDepth = 0; + else if (predDepth < 1) + maxTUDepth = 1; + else if (predDepth >= 1 && predDepth <= 1.5) + maxTUDepth = 2; + else if (predDepth > 1.5 && predDepth <= 2.5) + maxTUDepth = 3; + else + maxTUDepth = -1; + + return maxTUDepth; +} + void Analysis::tryLossless(const CUGeom& cuGeom) { ModeDepth& md = m_modeDepth[cuGeom.depth]; @@ -326,6 +377,15 @@ checkBestMode(md.pred[PRED_INTRA_NxN], depth); } + if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4) + { + CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr); + int8_t maxTUDepth = -1; + for (uint32_t i = 0; i < cuGeom.numPartitions; i++) + maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]); + ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth; + } + if (m_bTryLossless) tryLossless(cuGeom); @@ -894,6 +954,9 @@ bool skipRectAmp = false; bool chooseMerge = false; + if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4) + m_maxTUDepth = loadTUDepth(cuGeom, parentCTU); + SplitData splitData[4]; splitData[0].initSplitCUData(); splitData[1].initSplitCUData(); @@ -1400,6 +1463,18 @@ if (m_param->rdLevel) md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx); + if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4) + { + if (mightNotSplit) + { + CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr); + int8_t maxTUDepth = -1; + for (uint32_t i = 0; i < cuGeom.numPartitions; i++) + maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]); + ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth; + } + } + return splitCUData; } @@ -1424,6 +1499,9 @@ md.pred[PRED_2Nx2N].rdCost = 0; } + if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4) + m_maxTUDepth = loadTUDepth(cuGeom, parentCTU); + SplitData splitData[4]; splitData[0].initSplitCUData(); splitData[1].initSplitCUData(); @@ -1751,6 +1829,18 @@ addSplitFlagCost(*md.bestMode, cuGeom.depth); } + if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4) + { + if (mightNotSplit) + { + CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr); + int8_t maxTUDepth = -1; + for (uint32_t i = 0; i < cuGeom.numPartitions; i++) + maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]); + ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth; + } + } + /* compare split RD cost against best cost */ if (mightSplit && !skipRecursion) checkBestMode(md.pred[PRED_SPLIT], depth); diff -r c5295126f248 -r 40a0a322b26f source/encoder/analysis.h --- a/source/encoder/analysis.h Fri Nov 18 11:49:05 2016 +0530 +++ b/source/encoder/analysis.h Fri Nov 18 12:06:08 2016 +0530 @@ -116,6 +116,7 @@ void destroy(); Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext); + int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU); protected: /* Analysis data for save/load mode, writes/reads data based on absPartIdx */ diff -r c5295126f248 -r 40a0a322b26f source/encoder/search.cpp --- a/source/encoder/search.cpp Fri Nov 18 11:49:05 2016 +0530 +++ b/source/encoder/search.cpp Fri Nov 18 12:06:08 2016 +0530 @@ -67,7 +67,7 @@ m_param = NULL; m_slice = NULL; m_frame = NULL; - m_maxTUDepth = 0; + m_maxTUDepth = -1; } bool Search::initSearch(const x265_param& param, ScalingList& scalingList) @@ -97,10 +97,12 @@ m_limitTU = 0; if (m_param->limitTU) { - if (m_param->limitTU == 1) - m_limitTU = X265_TU_LIMIT_BFS; - else if (m_param->limitTU == 2) + if (m_param->limitTU == 1) + m_limitTU = X265_TU_LIMIT_BFS; + else if (m_param->limitTU == 2) m_limitTU = X265_TU_LIMIT_DFS; + else if (m_param->limitTU == 3) + m_limitTU = X265_TU_LIMIT_NEIGH; } /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32 @@ -2628,11 +2630,17 @@ uint32_t tuDepthRange[2]; cu.getInterTUQtDepthRange(tuDepthRange, 0); + if (m_limitTU & X265_TU_LIMIT_NEIGH) + { + int32_t maxLog2CUSize = g_log2Size[m_param->maxCUSize]; + m_maxTUDepth = x265_clip3(maxLog2CUSize - (int32_t)tuDepthRange[1], maxLog2CUSize - (int32_t)tuDepthRange[0], m_maxTUDepth); + } + m_entropyCoder.load(m_rqt[depth].cur); - if (m_param->limitTU & X265_TU_LIMIT_DFS) - m_maxTUDepth = 0; - else if (m_param->limitTU & X265_TU_LIMIT_BFS) + if (m_limitTU & X265_TU_LIMIT_DFS) + m_maxTUDepth = -1; + else if (m_limitTU & X265_TU_LIMIT_BFS) memset(&m_cacheTU, 0, sizeof(TUInfoCache)); Cost costs; @@ -2895,7 +2903,7 @@ uint32_t ycbf = 0, ucbf = 0, vcbf = 0; for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts) { - if ((m_param->limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 1) + if ((m_limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 1) { // Fetch maximum TU depth of first sub partition to limit recursion of others for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++) @@ -2946,12 +2954,7 @@ bool bSaveTUData = false, bLoadTUData = false; uint32_t idx = 0; - if ((m_param->limitTU & X265_TU_LIMIT_DFS) && m_maxTUDepth) - { - uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth; - bCheckSplit = log2TrSize > log2MaxTrSize; - } - else if ((m_param->limitTU & X265_TU_LIMIT_BFS) && splitMore >= 0) + if ((m_limitTU & X265_TU_LIMIT_BFS) && splitMore >= 0) { if (bCheckSplit && bCheckFull && tuDepth) { @@ -2970,6 +2973,14 @@ } } } + else if (m_limitTU & X265_TU_LIMIT_DFS || m_limitTU & X265_TU_LIMIT_NEIGH) + { + if (bCheckSplit && m_maxTUDepth >= 0) + { + uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth; + bCheckSplit = log2TrSize > log2MaxTrSize; + } + } bool bSplitPresentFlag = bCheckSplit && bCheckFull; @@ -3497,7 +3508,7 @@ { if (splitCost.rdcost < fullCost.rdcost) { - if (m_param->limitTU & X265_TU_LIMIT_BFS) + if (m_limitTU & X265_TU_LIMIT_BFS) { uint32_t nextlog2TrSize = cuGeom.log2CUSize - (tuDepth + 1); bool nextSplit = nextlog2TrSize > depthRange[0]; diff -r c5295126f248 -r 40a0a322b26f source/encoder/search.h --- a/source/encoder/search.h Fri Nov 18 11:49:05 2016 +0530 +++ b/source/encoder/search.h Fri Nov 18 12:06:08 2016 +0530 @@ -277,7 +277,7 @@ uint32_t m_numLayers; uint32_t m_refLagPixels; - uint32_t m_maxTUDepth; + int32_t m_maxTUDepth; uint16_t m_limitTU; int16_t m_sliceMaxY; diff -r c5295126f248 -r 40a0a322b26f source/x265.h --- a/source/x265.h Fri Nov 18 11:49:05 2016 +0530 +++ b/source/x265.h Fri Nov 18 12:06:08 2016 +0530 @@ -357,6 +357,7 @@ #define X265_TU_LIMIT_BFS 1 #define X265_TU_LIMIT_DFS 2 +#define X265_TU_LIMIT_NEIGH 4 #define X265_BFRAME_MAX 16 #define X265_MAX_FRAME_THREADS 16 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel