On Mon, Nov 20, 2017 at 12:53 PM, Ashok Kumar Mishra < as...@multicorewareinc.com> wrote:
> > > On Mon, Nov 20, 2017 at 11:01 AM, Pradeep Ramachandran < > prad...@multicorewareinc.com> wrote: > >> >> On Fri, Nov 17, 2017 at 7:23 PM, <prav...@multicorewareinc.com> wrote: >> >>> # HG changeset patch >>> # User Praveen Tiwari <prav...@multicorewareinc.com> >>> # Date 1510926794 -19800 >>> # Fri Nov 17 19:23:14 2017 +0530 >>> # Node ID 6b248ccb14169d2b0d5b84d50d94a153bd8f3b4f >>> # Parent 9723e8812e63ce51e38ede41f7d5edf73cad0849 >>> analysis: use AVC CU analysis-info for HEVC mode analysis >>> >> >> Pushed to default. Thanks! >> >> >>> >>> This patch work implements the functionality for anlysis-reuselevel 7, >>> here we want >>> to use AVC analysis-info for HEVC mode decision and use the depth from >>> offload >>> for AVC sizes >>> >>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.cpp >>> --- a/source/common/cudata.cpp Fri Nov 17 14:16:31 2017 +0530 >>> +++ b/source/common/cudata.cpp Fri Nov 17 19:23:14 2017 +0530 >>> @@ -201,6 +201,8 @@ >>> m_cuDepth = charBuf; charBuf += m_numPartitions; >>> m_predMode = charBuf; charBuf += m_numPartitions; /* >>> the order up to here is important in initCTU() and initSubCU() */ >>> m_partSize = charBuf; charBuf += m_numPartitions; >>> + m_skipFlag[0] = charBuf; charBuf += m_numPartitions; >>> + m_skipFlag[1] = charBuf; charBuf += m_numPartitions; >>> m_mergeFlag = charBuf; charBuf += m_numPartitions; >>> m_interDir = charBuf; charBuf += m_numPartitions; >>> m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; >>> @@ -239,6 +241,8 @@ >>> m_cuDepth = charBuf; charBuf += m_numPartitions; >>> m_predMode = charBuf; charBuf += m_numPartitions; /* >>> the order up to here is important in initCTU() and initSubCU() */ >>> m_partSize = charBuf; charBuf += m_numPartitions; >>> + m_skipFlag[0] = charBuf; charBuf += m_numPartitions; >>> + m_skipFlag[1] = charBuf; charBuf += m_numPartitions; >>> m_mergeFlag = charBuf; charBuf += m_numPartitions; >>> m_interDir = charBuf; charBuf += m_numPartitions; >>> m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; >>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.h >>> --- a/source/common/cudata.h Fri Nov 17 14:16:31 2017 +0530 >>> +++ b/source/common/cudata.h Fri Nov 17 19:23:14 2017 +0530 >>> @@ -199,13 +199,14 @@ >>> uint8_t* m_predMode; // array of prediction modes >>> uint8_t* m_partSize; // array of partition sizes >>> uint8_t* m_mergeFlag; // array of merge flags >>> + uint8_t* m_skipFlag[2]; >>> uint8_t* m_interDir; // array of inter directions >>> uint8_t* m_mvpIdx[2]; // array of motion vector >>> predictor candidates or merge candidate indices [0] >>> uint8_t* m_tuDepth; // array of transform indices >>> uint8_t* m_transformSkip[3]; // array of transform skipping >>> flags per plane >>> uint8_t* m_cbf[3]; // array of coded block flags >>> (CBF) per plane >>> uint8_t* m_chromaIntraDir; // array of intra directions >>> (chroma) >>> - enum { BytesPerPartition = 21 }; // combined sizeof() of all >>> per-part data >>> + enum { BytesPerPartition = 23 }; // combined sizeof() of all >>> per-part data >>> >>> sse_t* m_distortion; >>> coeff_t* m_trCoeff[3]; // transformed coefficient buffer >>> per plane >>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/framedata.h >>> --- a/source/common/framedata.h Fri Nov 17 14:16:31 2017 +0530 >>> +++ b/source/common/framedata.h Fri Nov 17 19:23:14 2017 +0530 >>> @@ -195,6 +195,7 @@ >>> uint8_t* mvpIdx[2]; >>> int8_t* refIdx[2]; >>> MV* mv[2]; >>> + int64_t* sadCost; >>> }; >>> >>> struct analysis2PassFrameData >>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.cpp >>> --- a/source/encoder/analysis.cpp Fri Nov 17 14:16:31 2017 +0530 >>> +++ b/source/encoder/analysis.cpp Fri Nov 17 19:23:14 2017 +0530 >>> @@ -75,6 +75,10 @@ >>> m_reuseInterDataCTU = NULL; >>> m_reuseRef = NULL; >>> m_bHD = false; >>> + m_modeFlag[0] = false; >>> + m_modeFlag[1] = false; >>> + m_checkMergeAndSkipOnly[0] = false; >>> + m_checkMergeAndSkipOnly[1] = false; >>> m_evaluateInter = 0; >>> } >>> >>> @@ -247,6 +251,9 @@ >>> memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], >>> sizeof(uint8_t) * numPartition); >>> memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], >>> sizeof(uint8_t) * numPartition); >>> memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], >>> sizeof(uint8_t) * numPartition); >>> + for (int list = 0; list < m_slice->isInterB() + 1; list++) >>> + memcpy(ctu.m_skipFlag[list], >>> &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) * >>> numPartition); >>> + >>> if ((m_slice->m_sliceType == P_SLICE || >>> m_param->bIntraInBFrames) && !m_param->bMVType) >>> { >>> analysis_intra_data* intraDataCTU = >>> (analysis_intra_data*)m_frame->m_analysisData.intraData; >>> @@ -1162,7 +1169,11 @@ >>> PicYuv& reconPic = *m_frame->m_reconPic; >>> SplitData splitCUData; >>> >>> - if ((m_param->bMVType && cuGeom.numPartitions > 16) || >>> !m_param->bMVType) >>> + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions >>> > 16); >>> + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 && >>> (m_modeFlag[0] || m_modeFlag[1])); >>> + bool bNooffloading = !m_param->bMVType; >>> + >>> + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading) >>> { >>> md.bestMode = NULL; >>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); >>> @@ -1296,7 +1307,7 @@ >>> } >>> >>> /* Step 1. Evaluate Merge/Skip candidates for likely >>> early-outs, if skip mode was not set above */ >>> - if (mightNotSplit && depth >= minDepth && !md.bestMode && >>> !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */ >>> + if ((mightNotSplit && depth >= minDepth && !md.bestMode && >>> !bCtuInfoCheck) || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1]))) >>> /* TODO: Re-evaluate if analysis load/save still works */ >>> { >>> /* Compute Merge Cost */ >>> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); >>> @@ -1307,7 +1318,7 @@ >>> && md.bestMode && md.bestMode->cu.isSkipped(0); // >>> TODO: sa8d threshold per depth >>> } >>> >>> - if (md.bestMode && m_param->bEnableRecursionSkip && >>> !bCtuInfoCheck) >>> + if (md.bestMode && m_param->bEnableRecursionSkip && >>> !bCtuInfoCheck && !(m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1]))) >>> { >>> skipRecursion = md.bestMode->cu.isSkipped(0); >>> if (mightSplit && depth >= minDepth && !skipRecursion) >>> @@ -1319,6 +1330,9 @@ >>> } >>> } >>> >>> + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= >>> 16) >>> + skipRecursion = true; >>> + >>> /* Step 2. Evaluate each of the 4 split sub-blocks in series */ >>> if (mightSplit && !skipRecursion) >>> { >>> @@ -1374,6 +1388,10 @@ >>> splitPred->sa8dCost = >>> m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion, >>> splitPred->sa8dBits); >>> } >>> >>> + /* If analysis mode is simple do not Evaluate other modes */ >>> + if ((m_param->bMVType && cuGeom.numPartitions <= 16) && >>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE)) >>> + mightNotSplit = !(m_checkMergeAndSkipOnly[0] || >>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1])); >>> + >>> /* Split CUs >>> * 0 1 >>> * 2 3 */ >>> @@ -1838,7 +1856,12 @@ >>> } >>> >>> It is better to write a separate function when bMVType is enabled. When > numPartitions > 16, call compressInterCU_rd5_6(), else write code specific > to > your requirement. It will be much cleaner and readable, so that the base > code will not be disturbed. > > Same is applied for the case of compressInterCU_rd0_4(). > > SplitData splitCUData; >>> - if ((m_param->bMVType && cuGeom.numPartitions > 16) || >>> !m_param->bMVType) >>> + >>> + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions >>> > 16); >>> + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 && >>> (m_modeFlag[0] || m_modeFlag[1])); >>> + bool bNooffloading = !m_param->bMVType; >>> + >>> + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading) >>> { >>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); >>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); >>> @@ -1977,7 +2000,7 @@ >>> } >>> >>> /* Step 1. Evaluate Merge/Skip candidates for likely early-outs >>> */ >>> - if (mightNotSplit && !md.bestMode && !bCtuInfoCheck) >>> + if (mightNotSplit && !md.bestMode && !bCtuInfoCheck || >>> (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1]))) >>> { >>> md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); >>> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); >>> @@ -1993,6 +2016,9 @@ >>> skipRecursion = md.bestMode && >>> !md.bestMode->cu.getQtRootCbf(0); >>> } >>> >>> + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= >>> 16) >>> + skipRecursion = true; >>> + >>> // estimate split cost >>> /* Step 2. Evaluate each of the 4 split sub-blocks in series */ >>> if (mightSplit && !skipRecursion) >>> @@ -2045,6 +2071,10 @@ >>> checkDQPForSplitPred(*splitPred, cuGeom); >>> } >>> >>> + /* If analysis mode is simple do not Evaluate other modes */ >>> + if ((m_param->bMVType && cuGeom.numPartitions <= 16) && >>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE)) >>> + mightNotSplit = !(m_checkMergeAndSkipOnly[0] || >>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1])); >>> + >>> /* Split CUs >>> * 0 1 >>> * 2 3 */ >>> @@ -2479,6 +2509,22 @@ >>> checkDQPForSplitPred(*md.bestMode, cuGeom); >>> } >>> >>> + if (m_param->bMVType && m_param->analysisReuseLevel == 7) >>> + { >>> + for (int list = 0; list < m_slice->isInterB() + 1; list++) >>> + { >>> + m_modeFlag[list] = true; >>> + if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1 >>> && cuGeom.numPartitions <= 16) >>> + m_checkMergeAndSkipOnly[list] = true; >>> + } >>> + m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU, >>> cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp); >>> + for (int list = 0; list < m_slice->isInterB() + 1; list++) >>> + { >>> + m_modeFlag[list] = false; >>> + m_checkMergeAndSkipOnly[list] = false; >>> + } >>> + } >>> + >>> if (m_param->interRefine > 1 || (m_param->interRefine && >>> parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && >>> !mode.cu.isSkipped(0))) >>> { >>> m_evaluateInter = 1; >>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.h >>> --- a/source/encoder/analysis.h Fri Nov 17 14:16:31 2017 +0530 >>> +++ b/source/encoder/analysis.h Fri Nov 17 19:23:14 2017 +0530 >>> @@ -110,6 +110,9 @@ >>> bool m_bChromaSa8d; >>> bool m_bHD; >>> >>> + bool m_modeFlag[2]; >>> + bool m_checkMergeAndSkipOnly[2]; >>> + >>> Analysis(); >>> >>> bool create(ThreadLocalData* tld); >>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/encoder.cpp >>> --- a/source/encoder/encoder.cpp Fri Nov 17 14:16:31 2017 +0530 >>> +++ b/source/encoder/encoder.cpp Fri Nov 17 19:23:14 2017 +0530 >>> @@ -48,6 +48,12 @@ >>> const char g_sliceTypeToChar[] = {'B', 'P', 'I'}; >>> } >>> >>> +/* Threshold for motion vection, based on expermental result. >>> + * TODO: come up an algorithm for adoptive threshold */ >>> + >>> +#define MVTHRESHOLD 10 >>> +#define PU_2Nx2N 1 >>> >> MVTHRESHOLD is not used anywhere, so please remove it. we have already used one enum PartSize for different PU sizes. Make use of it. > + >>> static const char* defaultAnalysisFileName = "x265_analysis.dat"; >>> >>> using namespace X265_NS; >>> @@ -565,6 +571,14 @@ >>> (interData)->mvpIdx[k][cuPos + cuOffset] = >>> (srcInterData)->mvpIdx[k][(mbIndex * 16) + cuOffset]; >>> (interData)->refIdx[k][cuPos + cuOffset] = >>> (srcInterData)->refIdx[k][(mbIndex * 16) + cuOffset]; >>> memcpy(&(interData)->mv[k][cuPos + >>> cuOffset], &(srcInterData)->mv[k][(mbIndex * 16) + cuOffset], >>> sizeof(MV)); >>> + if (m_param->analysisReuseLevel == 7) >>> + { >>> + int mv_x = ((analysis_inter_data >>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + >>> cuOffset].x; >>> + int mv_y = ((analysis_inter_data >>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + >>> cuOffset].y; >>> + double mv = sqrt(mv_x*mv_x + mv_y*mv_y); >>> + if (numPU == PU_2Nx2N && >>> ((srcInterData)->depth[cuPos + cuOffset] == (m_param->maxCUSize >> 5)) && >>> mv <= MVTHRESHOLD) >>> + >>> memset(&curFrame->m_analysisData.modeFlag[k][cuPos >>> + cuOffset], 1, bytes); >>> + } >>> } >>> } >>> } >>> @@ -624,6 +638,7 @@ >>> int bytes = curFrame->m_analysisData.numPartitions >>> >> ((interData)->depth[d] * 2); >>> memset(&(currInterData)->depth[count], >>> (interData)->depth[d], bytes); >>> memset(&(currInterData)->modes[count], >>> (interData)->modes[d], bytes); >>> + memcpy(&(currInterData)->sadCost[count], >>> &((analysis_inter_data*)analysis_data->interData)->sadCost[d], bytes); >>> if (m_param->analysisReuseLevel > 4) >>> { >>> memset(&(currInterData)->partSize[count], >>> (interData)->partSize[d], bytes); >>> @@ -639,6 +654,14 @@ >>> (currInterData)->mvpIdx[i][count + >>> pu] = (interData)->mvpIdx[i][d]; >>> (currInterData)->refIdx[i][count + >>> pu] = (interData)->refIdx[i][d]; >>> memcpy(&(currInterData)->mv[i][count >>> + pu], &(interData)->mv[i][d], sizeof(MV)); >>> + if (m_param->analysisReuseLevel == >>> 7) >>> + { >>> + int mv_x = >>> ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count >>> + pu].x; >>> + int mv_y = >>> ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count >>> + pu].y; >>> >> We have already a local copy analysis_inter_data * interData = (analysis_inter_data *)analysis_data->interData; Use it. + double mv = sqrt(mv_x*mv_x + >>> mv_y*mv_y); >>> + if (numPU == PU_2Nx2N && >>> m_param->num4x4Partitions <= 16 && mv <= MVTHRESHOLD) >>> + >>> memset(&curFrame->m_analysisData.modeFlag[i][count + pu], 1, bytes); >>> + } >>> } >>> } >>> } >>> @@ -3116,12 +3139,14 @@ >>> if (m_param->analysisReuseLevel >= 7) >>> { >>> X265_FREE(((analysis_inter_da >>> ta*)analysis->interData)->interDir); >>> + X265_FREE(((analysis_inter_dat >>> a*)analysis->interData)->sadCost); >>> int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2; >>> for (int dir = 0; dir < numDir; dir++) >>> { >>> X265_FREE(((analysis_inter_da >>> ta*)analysis->interData)->mvpIdx[dir]); >>> X265_FREE(((analysis_inter_da >>> ta*)analysis->interData)->refIdx[dir]); >>> X265_FREE(((analysis_inter_da >>> ta*)analysis->interData)->mv[dir]); >>> + X265_FREE(analysis->modeFlag[dir]); >>> } >>> } >>> else >>> diff -r 9723e8812e63 -r 6b248ccb1416 source/x265.h >>> --- a/source/x265.h Fri Nov 17 14:16:31 2017 +0530 >>> +++ b/source/x265.h Fri Nov 17 19:23:14 2017 +0530 >>> @@ -123,6 +123,7 @@ >>> void* intraData; >>> uint32_t numCuInHeight; >>> x265_lookahead_data lookahead; >>> + uint8_t* modeFlag[2]; >>> } x265_analysis_data; >>> >>> /* cu statistics */ >>> _______________________________________________ >>> x265-devel mailing list >>> x265-devel@videolan.org >>> https://mailman.videolan.org/listinfo/x265-devel >>> >> >> >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> >> >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel