# HG changeset patch # User Praveen Tiwari <prav...@multicorewareinc.com> # Date 1510926794 -19800 # Fri Nov 17 19:23:14 2017 +0530 # Node ID 6b248ccb14169d2b0d5b84d50d94a153bd8f3b4f # Parent 9723e8812e63ce51e38ede41f7d5edf73cad0849 analysis: use AVC CU analysis-info for HEVC mode analysis
This patch work implements the functionality for anlysis-reuselevel 7, here we want to use AVC analysis-info for HEVC mode decision and use the depth from offload for AVC sizes diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.cpp --- a/source/common/cudata.cpp Fri Nov 17 14:16:31 2017 +0530 +++ b/source/common/cudata.cpp Fri Nov 17 19:23:14 2017 +0530 @@ -201,6 +201,8 @@ m_cuDepth = charBuf; charBuf += m_numPartitions; m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ m_partSize = charBuf; charBuf += m_numPartitions; + m_skipFlag[0] = charBuf; charBuf += m_numPartitions; + m_skipFlag[1] = charBuf; charBuf += m_numPartitions; m_mergeFlag = charBuf; charBuf += m_numPartitions; m_interDir = charBuf; charBuf += m_numPartitions; m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; @@ -239,6 +241,8 @@ m_cuDepth = charBuf; charBuf += m_numPartitions; m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ m_partSize = charBuf; charBuf += m_numPartitions; + m_skipFlag[0] = charBuf; charBuf += m_numPartitions; + m_skipFlag[1] = charBuf; charBuf += m_numPartitions; m_mergeFlag = charBuf; charBuf += m_numPartitions; m_interDir = charBuf; charBuf += m_numPartitions; m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.h --- a/source/common/cudata.h Fri Nov 17 14:16:31 2017 +0530 +++ b/source/common/cudata.h Fri Nov 17 19:23:14 2017 +0530 @@ -199,13 +199,14 @@ uint8_t* m_predMode; // array of prediction modes uint8_t* m_partSize; // array of partition sizes uint8_t* m_mergeFlag; // array of merge flags + uint8_t* m_skipFlag[2]; uint8_t* m_interDir; // array of inter directions uint8_t* m_mvpIdx[2]; // array of motion vector predictor candidates or merge candidate indices [0] uint8_t* m_tuDepth; // array of transform indices uint8_t* m_transformSkip[3]; // array of transform skipping flags per plane uint8_t* m_cbf[3]; // array of coded block flags (CBF) per plane uint8_t* m_chromaIntraDir; // array of intra directions (chroma) - enum { BytesPerPartition = 21 }; // combined sizeof() of all per-part data + enum { BytesPerPartition = 23 }; // combined sizeof() of all per-part data sse_t* m_distortion; coeff_t* m_trCoeff[3]; // transformed coefficient buffer per plane diff -r 9723e8812e63 -r 6b248ccb1416 source/common/framedata.h --- a/source/common/framedata.h Fri Nov 17 14:16:31 2017 +0530 +++ b/source/common/framedata.h Fri Nov 17 19:23:14 2017 +0530 @@ -195,6 +195,7 @@ uint8_t* mvpIdx[2]; int8_t* refIdx[2]; MV* mv[2]; + int64_t* sadCost; }; struct analysis2PassFrameData diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Fri Nov 17 14:16:31 2017 +0530 +++ b/source/encoder/analysis.cpp Fri Nov 17 19:23:14 2017 +0530 @@ -75,6 +75,10 @@ m_reuseInterDataCTU = NULL; m_reuseRef = NULL; m_bHD = false; + m_modeFlag[0] = false; + m_modeFlag[1] = false; + m_checkMergeAndSkipOnly[0] = false; + m_checkMergeAndSkipOnly[1] = false; m_evaluateInter = 0; } @@ -247,6 +251,9 @@ memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], sizeof(uint8_t) * numPartition); memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition); memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], sizeof(uint8_t) * numPartition); + for (int list = 0; list < m_slice->isInterB() + 1; list++) + memcpy(ctu.m_skipFlag[list], &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) * numPartition); + if ((m_slice->m_sliceType == P_SLICE || m_param->bIntraInBFrames) && !m_param->bMVType) { analysis_intra_data* intraDataCTU = (analysis_intra_data*)m_frame->m_analysisData.intraData; @@ -1162,7 +1169,11 @@ PicYuv& reconPic = *m_frame->m_reconPic; SplitData splitCUData; - if ((m_param->bMVType && cuGeom.numPartitions > 16) || !m_param->bMVType) + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions > 16); + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])); + bool bNooffloading = !m_param->bMVType; + + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading) { md.bestMode = NULL; bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); @@ -1296,7 +1307,7 @@ } /* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */ - if (mightNotSplit && depth >= minDepth && !md.bestMode && !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */ + if ((mightNotSplit && depth >= minDepth && !md.bestMode && !bCtuInfoCheck) || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1]))) /* TODO: Re-evaluate if analysis load/save still works */ { /* Compute Merge Cost */ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); @@ -1307,7 +1318,7 @@ && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth } - if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck) + if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1]))) { skipRecursion = md.bestMode->cu.isSkipped(0); if (mightSplit && depth >= minDepth && !skipRecursion) @@ -1319,6 +1330,9 @@ } } + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= 16) + skipRecursion = true; + /* Step 2. Evaluate each of the 4 split sub-blocks in series */ if (mightSplit && !skipRecursion) { @@ -1374,6 +1388,10 @@ splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion, splitPred->sa8dBits); } + /* If analysis mode is simple do not Evaluate other modes */ + if ((m_param->bMVType && cuGeom.numPartitions <= 16) && (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE)) + mightNotSplit = !(m_checkMergeAndSkipOnly[0] || (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1])); + /* Split CUs * 0 1 * 2 3 */ @@ -1838,7 +1856,12 @@ } SplitData splitCUData; - if ((m_param->bMVType && cuGeom.numPartitions > 16) || !m_param->bMVType) + + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions > 16); + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])); + bool bNooffloading = !m_param->bMVType; + + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading) { bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); @@ -1977,7 +2000,7 @@ } /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */ - if (mightNotSplit && !md.bestMode && !bCtuInfoCheck) + if (mightNotSplit && !md.bestMode && !bCtuInfoCheck || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1]))) { md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); @@ -1993,6 +2016,9 @@ skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= 16) + skipRecursion = true; + // estimate split cost /* Step 2. Evaluate each of the 4 split sub-blocks in series */ if (mightSplit && !skipRecursion) @@ -2045,6 +2071,10 @@ checkDQPForSplitPred(*splitPred, cuGeom); } + /* If analysis mode is simple do not Evaluate other modes */ + if ((m_param->bMVType && cuGeom.numPartitions <= 16) && (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE)) + mightNotSplit = !(m_checkMergeAndSkipOnly[0] || (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1])); + /* Split CUs * 0 1 * 2 3 */ @@ -2479,6 +2509,22 @@ checkDQPForSplitPred(*md.bestMode, cuGeom); } + if (m_param->bMVType && m_param->analysisReuseLevel == 7) + { + for (int list = 0; list < m_slice->isInterB() + 1; list++) + { + m_modeFlag[list] = true; + if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1 && cuGeom.numPartitions <= 16) + m_checkMergeAndSkipOnly[list] = true; + } + m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU, cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp); + for (int list = 0; list < m_slice->isInterB() + 1; list++) + { + m_modeFlag[list] = false; + m_checkMergeAndSkipOnly[list] = false; + } + } + if (m_param->interRefine > 1 || (m_param->interRefine && parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && !mode.cu.isSkipped(0))) { m_evaluateInter = 1; diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.h --- a/source/encoder/analysis.h Fri Nov 17 14:16:31 2017 +0530 +++ b/source/encoder/analysis.h Fri Nov 17 19:23:14 2017 +0530 @@ -110,6 +110,9 @@ bool m_bChromaSa8d; bool m_bHD; + bool m_modeFlag[2]; + bool m_checkMergeAndSkipOnly[2]; + Analysis(); bool create(ThreadLocalData* tld); diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Fri Nov 17 14:16:31 2017 +0530 +++ b/source/encoder/encoder.cpp Fri Nov 17 19:23:14 2017 +0530 @@ -48,6 +48,12 @@ const char g_sliceTypeToChar[] = {'B', 'P', 'I'}; } +/* Threshold for motion vection, based on expermental result. + * TODO: come up an algorithm for adoptive threshold */ + +#define MVTHRESHOLD 10 +#define PU_2Nx2N 1 + static const char* defaultAnalysisFileName = "x265_analysis.dat"; using namespace X265_NS; @@ -565,6 +571,14 @@ (interData)->mvpIdx[k][cuPos + cuOffset] = (srcInterData)->mvpIdx[k][(mbIndex * 16) + cuOffset]; (interData)->refIdx[k][cuPos + cuOffset] = (srcInterData)->refIdx[k][(mbIndex * 16) + cuOffset]; memcpy(&(interData)->mv[k][cuPos + cuOffset], &(srcInterData)->mv[k][(mbIndex * 16) + cuOffset], sizeof(MV)); + if (m_param->analysisReuseLevel == 7) + { + int mv_x = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + cuOffset].x; + int mv_y = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + cuOffset].y; + double mv = sqrt(mv_x*mv_x + mv_y*mv_y); + if (numPU == PU_2Nx2N && ((srcInterData)->depth[cuPos + cuOffset] == (m_param->maxCUSize >> 5)) && mv <= MVTHRESHOLD) + memset(&curFrame->m_analysisData.modeFlag[k][cuPos + cuOffset], 1, bytes); + } } } } @@ -624,6 +638,7 @@ int bytes = curFrame->m_analysisData.numPartitions >> ((interData)->depth[d] * 2); memset(&(currInterData)->depth[count], (interData)->depth[d], bytes); memset(&(currInterData)->modes[count], (interData)->modes[d], bytes); + memcpy(&(currInterData)->sadCost[count], &((analysis_inter_data*)analysis_data->interData)->sadCost[d], bytes); if (m_param->analysisReuseLevel > 4) { memset(&(currInterData)->partSize[count], (interData)->partSize[d], bytes); @@ -639,6 +654,14 @@ (currInterData)->mvpIdx[i][count + pu] = (interData)->mvpIdx[i][d]; (currInterData)->refIdx[i][count + pu] = (interData)->refIdx[i][d]; memcpy(&(currInterData)->mv[i][count + pu], &(interData)->mv[i][d], sizeof(MV)); + if (m_param->analysisReuseLevel == 7) + { + int mv_x = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count + pu].x; + int mv_y = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count + pu].y; + double mv = sqrt(mv_x*mv_x + mv_y*mv_y); + if (numPU == PU_2Nx2N && m_param->num4x4Partitions <= 16 && mv <= MVTHRESHOLD) + memset(&curFrame->m_analysisData.modeFlag[i][count + pu], 1, bytes); + } } } } @@ -3116,12 +3139,14 @@ if (m_param->analysisReuseLevel >= 7) { X265_FREE(((analysis_inter_data*)analysis->interData)->interDir); + X265_FREE(((analysis_inter_data*)analysis->interData)->sadCost); int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2; for (int dir = 0; dir < numDir; dir++) { X265_FREE(((analysis_inter_data*)analysis->interData)->mvpIdx[dir]); X265_FREE(((analysis_inter_data*)analysis->interData)->refIdx[dir]); X265_FREE(((analysis_inter_data*)analysis->interData)->mv[dir]); + X265_FREE(analysis->modeFlag[dir]); } } else diff -r 9723e8812e63 -r 6b248ccb1416 source/x265.h --- a/source/x265.h Fri Nov 17 14:16:31 2017 +0530 +++ b/source/x265.h Fri Nov 17 19:23:14 2017 +0530 @@ -123,6 +123,7 @@ void* intraData; uint32_t numCuInHeight; x265_lookahead_data lookahead; + uint8_t* modeFlag[2]; } x265_analysis_data; /* cu statistics */ _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel