On Fri, May 25, 2018 at 4:31 PM, <bha...@multicorewareinc.com> wrote:
> # HG changeset patch > # User Bhavna Hariharan <bha...@multicorewareinc.com> > # Date 1527165877 -19800 > # Thu May 24 18:14:37 2018 +0530 > # Node ID 77d698d854fab725682213c9a39ac91aa632095f > # Parent cc2c5e46f3c87d27e3602af30b06ba6a0fbe2704 > Clean up dynamic refinement > > This patch does the following: > 1) Earlier, locks were used to avoid the possibility of race conditions > while > copying data from CTU level to frame level. Now, the data is collected for > each > row and when the entire frame completes analysis the row data is copied to > the > frame. This method eliminates the possibility of a race condition without > having to employ locks. > 2) Allocate memory for the CTU infromation from the data pool, this will > avoid > fragmentation of data. > > diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/common.h > --- a/source/common/common.h Mon May 21 18:42:29 2018 +0530 > +++ b/source/common/common.h Thu May 24 18:14:37 2018 +0530 > @@ -332,6 +332,8 @@ > #define START_CODE_OVERHEAD 3 > #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1) > > +#define MAX_NUM_DYN_REFINE (NUM_CU_DEPTH * > X265_REFINE_INTER_LEVELS) > + > namespace X265_NS { > > enum { SAO_NUM_OFFSET = 4 }; > diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.cpp > --- a/source/common/cudata.cpp Mon May 21 18:42:29 2018 +0530 > +++ b/source/common/cudata.cpp Thu May 24 18:14:37 2018 +0530 > @@ -317,16 +317,6 @@ > m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? > m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; > m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - > 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; > memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); > - > - if (m_encData->m_param->bDynamicRefine) > - { > - int size = m_encData->m_param->maxCUDepth * > X265_REFINE_INTER_LEVELS; > - CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size); > - CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size); > - CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size); > - } > -fail: > - return; > } > > // initialize Sub partition > diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.h > --- a/source/common/cudata.h Mon May 21 18:42:29 2018 +0530 > +++ b/source/common/cudata.h Thu May 24 18:14:37 2018 +0530 > @@ -353,8 +353,12 @@ > coeff_t* trCoeffMemBlock; > MV* mvMemBlock; > sse_t* distortionMemBlock; > + uint64_t* dynRefineRdBlock; > + uint32_t* dynRefCntBlock; > + uint32_t* dynRefVarBlock; > > - CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; > mvMemBlock = NULL; distortionMemBlock = NULL; } > + CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; > mvMemBlock = NULL; distortionMemBlock = NULL; > + dynRefineRdBlock = NULL; dynRefCntBlock = NULL; > dynRefVarBlock = NULL;} > > bool create(uint32_t depth, uint32_t csp, uint32_t numInstances, > const x265_param& param) > { > diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.cpp > --- a/source/common/framedata.cpp Mon May 21 18:42:29 2018 +0530 > +++ b/source/common/framedata.cpp Thu May 24 18:14:37 2018 +0530 > @@ -41,9 +41,25 @@ > if (param.rc.bStatWrite) > m_spsrps = const_cast<RPS*>(sps.spsrps); > bool isallocated = m_cuMemPool.create(0, param.internalCsp, > sps.numCUsInFrame, param); > + if (m_param->bDynamicRefine) > + { > + CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefineRdBlock, uint64_t, > MAX_NUM_DYN_REFINE * sps.numCUsInFrame); > + CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefCntBlock, uint32_t, > MAX_NUM_DYN_REFINE * sps.numCUsInFrame); > + CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefVarBlock, uint32_t, > MAX_NUM_DYN_REFINE * sps.numCUsInFrame); > + } > if (isallocated) > + { > for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++) > + { > + if (m_param->bDynamicRefine) > + { > + m_picCTU[ctuAddr].m_collectCURd = > m_cuMemPool.dynRefineRdBlock + (ctuAddr * MAX_NUM_DYN_REFINE); > + m_picCTU[ctuAddr].m_collectCUVariance = > m_cuMemPool.dynRefVarBlock + (ctuAddr * MAX_NUM_DYN_REFINE); > + m_picCTU[ctuAddr].m_collectCUCount = > m_cuMemPool.dynRefCntBlock + (ctuAddr * MAX_NUM_DYN_REFINE); > + } > m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param, ctuAddr); > + } > + } > else > return false; > CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame); > @@ -65,6 +81,12 @@ > { > memset(m_cuStat, 0, sps.numCUsInFrame * sizeof(*m_cuStat)); > memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat)); > + if (m_param->bDynamicRefine) > + { > + memset(m_picCTU->m_collectCURd, 0, MAX_NUM_DYN_REFINE * > sizeof(uint64_t)); > + memset(m_picCTU->m_collectCUVariance, 0, MAX_NUM_DYN_REFINE * > sizeof(uint32_t)); > + memset(m_picCTU->m_collectCUCount, 0, MAX_NUM_DYN_REFINE * > sizeof(uint32_t)); > + } > } > > void FrameData::destroy() > @@ -75,6 +97,12 @@ > > m_cuMemPool.destroy(); > > + if (m_param->bDynamicRefine) > + { > + X265_FREE(m_cuMemPool.dynRefineRdBlock); > + X265_FREE(m_cuMemPool.dynRefCntBlock); > + X265_FREE(m_cuMemPool.dynRefVarBlock); > + } > X265_FREE(m_cuStat); > X265_FREE(m_rowStat); > for (int i = 0; i < INTEGRAL_PLANE_NUM; i++) > diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.h > --- a/source/common/framedata.h Mon May 21 18:42:29 2018 +0530 > +++ b/source/common/framedata.h Thu May 24 18:14:37 2018 +0530 > @@ -88,6 +88,11 @@ > uint64_t cntInterPu[NUM_CU_DEPTH][INTER_MODES - 1]; > uint64_t cntMergePu[NUM_CU_DEPTH][INTER_MODES - 1]; > > + /* Feature values per row for dynamic refinement */ > + uint64_t rowRdDyn[MAX_NUM_DYN_REFINE]; > + uint32_t rowVarDyn[MAX_NUM_DYN_REFINE]; > + uint32_t rowCntDyn[MAX_NUM_DYN_REFINE]; > + > FrameStats() > { > memset(this, 0, sizeof(FrameStats)); > diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Mon May 21 18:42:29 2018 +0530 > +++ b/source/encoder/frameencoder.cpp Thu May 24 18:14:37 2018 +0530 > @@ -956,6 +956,9 @@ > } > } // end of (m_param->maxSlices > 1) > > + if (m_param->bDynamicRefine && m_top->m_startPoint <= > m_frame->m_encodeOrder) //Avoid collecting data that will not be used by > future frames. > + collectDynDataFrame(); > + > if (m_param->rc.bStatWrite) > { > int totalI = 0, totalP = 0, totalSkip = 0; > @@ -1494,31 +1497,12 @@ > > // Does all the CU analysis, returns best top level mode decision > Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, > m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder); > - if (m_param->bDynamicRefine) > - { > - if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid > collecting data that will not be used by future frames. > - { > - ScopedLock dynLock(m_top->m_dynamicRefineLock); > - for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) > - { > - for (uint32_t depth = 0; depth < m_param->maxCUDepth; > depth++) > - { > - int offset = (depth * X265_REFINE_INTER_LEVELS) + > i; > - int curFrameIndex = m_frame->m_encodeOrder - > m_top->m_startPoint; > - int index = (curFrameIndex * > X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset; > - if (ctu->m_collectCUCount[offset]) > - { > - m_top->m_variance[index] += > ctu->m_collectCUVariance[offset]; > - m_top->m_rdCost[index] += > ctu->m_collectCURd[offset]; > - m_top->m_trainingCount[index] += > ctu->m_collectCUCount[offset]; > - } > - } > - } > - } > - X265_FREE_ZERO(ctu->m_collectCUVariance); > - X265_FREE_ZERO(ctu->m_collectCURd); > - X265_FREE_ZERO(ctu->m_collectCUCount); > - } > + > + /* startPoint > encodeOrder is true when the start point changes > for > + a new GOP but few frames from the previous GOP is still > incomplete. > + The data of frames in this interval will not be used by any > future frames. */ > + if (m_param->bDynamicRefine && m_top->m_startPoint <= > m_frame->m_encodeOrder) > + collectDynDataRow(*ctu, &curRow.rowStats); > > // take a sample of the current active worker count > ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount); > @@ -1901,6 +1885,46 @@ > if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows) > m_completionEvent.trigger(); > } > + > +void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats) > +{ > + for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) > + { > + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) > + { > + int offset = (depth * X265_REFINE_INTER_LEVELS) + i; > + if (ctu.m_collectCUCount[offset]) > + { > + rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[ > offset]; > + rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset]; > + rowStats->rowCntDyn[offset] += > ctu.m_collectCUCount[offset]; > + } > + } > + } > +} > + > +void FrameEncoder::collectDynDataFrame() > +{ > + for (uint32_t row = 0; row < m_numRows; row++) > + { > + for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; > refLevel++) > + { > + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++) > + { > + int offset = (depth * X265_REFINE_INTER_LEVELS) + > refLevel; > + int curFrameIndex = m_frame->m_encodeOrder - > m_top->m_startPoint; > + int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * > m_param->maxCUDepth) + offset; > + if (m_rows[row].rowStats.rowCntDyn[offset]) > + { > + m_top->m_variance[index] += m_rows[row].rowStats. > rowVarDyn[offset]; > + m_top->m_rdCost[index] += > m_rows[row].rowStats.rowRdDyn[offset]; > + m_top->m_trainingCount[index] += m_rows[row].rowStats. > rowCntDyn[offset]; > + } > + } > + } > + } > +} > + > void FrameEncoder::computeAvgTrainingData() > { > if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe) > diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.h > --- a/source/encoder/frameencoder.h Mon May 21 18:42:29 2018 +0530 > +++ b/source/encoder/frameencoder.h Thu May 24 18:14:37 2018 +0530 > @@ -243,6 +243,8 @@ > #if ENABLE_LIBVMAF > void vmafFrameLevelScore(); > #endif > + void collectDynDataRow(CUData& ctu, FrameStats* rowStats); > + void collectDynDataFrame(); > }; > } > > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > > Pushed.
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel