# HG changeset patch # User Sagar Kotecha<sa...@multicorewareinc.com> # Date 1458817615 -19800 # Thu Mar 24 16:36:55 2016 +0530 # Node ID 5bccf2596d8a1d66a6a9d460e65b1b9b93c2d112 # Parent 2de6cb99313a03c3577934ac5e2e116f7ba6cd10 analysis: skip rect/amp in analysis load mode
Avoid doing rect/amp analysis in load mode if the save mode has not chosen it as the best partition diff -r 2de6cb99313a -r 5bccf2596d8a source/common/framedata.h --- a/source/common/framedata.h Mon Mar 21 13:50:14 2016 +0530 +++ b/source/common/framedata.h Thu Mar 24 16:36:55 2016 +0530 @@ -172,6 +172,8 @@ int32_t* ref; uint8_t* depth; uint8_t* modes; + uint8_t* partSize; + uint8_t* mergeFlag; }; } #endif // ifndef X265_FRAMEDATA_H diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Mar 21 13:50:14 2016 +0530 +++ b/source/encoder/analysis.cpp Thu Mar 24 16:36:55 2016 +0530 @@ -149,6 +149,8 @@ m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir]; m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr * ctu.m_numPartitions]; m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr * ctu.m_numPartitions]; + m_reusePartSize = &m_reuseInterDataCTU->partSize[ctu.m_cuAddr * ctu.m_numPartitions]; + m_reuseMergeFlag = &m_reuseInterDataCTU->mergeFlag[ctu.m_cuAddr * ctu.m_numPartitions]; if (m_param->analysisMode == X265_ANALYSIS_SAVE) for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++) m_reuseRef[i] = -1; @@ -885,6 +887,8 @@ uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom); bool earlyskip = false; bool splitIntra = true; + bool skipRectAmp = false; + bool chooseMerge = false; SplitData splitData[4]; splitData[0].initSplitCUData(); @@ -903,15 +907,26 @@ bool foundSkip = false; if (m_param->analysisMode == X265_ANALYSIS_LOAD) { - if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] && m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP) + if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx]) { - md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); - md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); - checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); + if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP) + { + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); + checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - foundSkip = true; - if (m_param->rdLevel) - earlyskip = md.bestMode && m_param->bEnableEarlySkip; + foundSkip = true; + if (m_param->rdLevel) + earlyskip = md.bestMode && m_param->bEnableEarlySkip; + } + if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N) + { + if (m_reuseModes[cuGeom.absPartIdx] != MODE_INTRA && m_reuseModes[cuGeom.absPartIdx] != 4) + { + skipRectAmp = true && !!md.bestMode; + chooseMerge = !!m_reuseMergeFlag[cuGeom.absPartIdx] && !!md.bestMode; + } + } } } @@ -1017,158 +1032,161 @@ } Mode *bestInter = &md.pred[PRED_2Nx2N]; - if (m_param->bEnableRectInter) + if (!skipRectAmp) { - uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; - uint32_t threshold_2NxN, threshold_Nx2N; + if (m_param->bEnableRectInter) + { + uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; + uint32_t threshold_2NxN, threshold_Nx2N; - if (m_slice->m_sliceType == P_SLICE) - { - threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0]; - threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; - } - else - { - threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0] - + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1; - threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] - + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1; + if (m_slice->m_sliceType == P_SLICE) + { + threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0]; + threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; + } + else + { + threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0] + + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1; + threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] + + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1; + } + + int try_2NxN_first = threshold_2NxN < threshold_Nx2N; + if (try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN) + { + refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ + md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); + if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_2NxN]; + } + + if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_Nx2N) + { + refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */ + refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */ + md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks); + if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_Nx2N]; + } + + if (!try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN) + { + refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ + md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); + if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_2NxN]; + } } - int try_2NxN_first = threshold_2NxN < threshold_Nx2N; - if (try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN) + if (m_slice->m_sps->maxAMPDepth > depth) { - refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); - if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_2NxN]; - } + uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; + uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N; - if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_Nx2N) - { - refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */ - refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */ - md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks); - if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_Nx2N]; - } + if (m_slice->m_sliceType == P_SLICE) + { + threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0]; + threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0]; - if (!try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN) - { - refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); - if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_2NxN]; - } - } + threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; + threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0]; + } + else + { + threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0] + + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1; + threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0] + + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; - if (m_slice->m_sps->maxAMPDepth > depth) - { - uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; - uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N; - - if (m_slice->m_sliceType == P_SLICE) - { - threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0]; - threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0]; - - threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; - threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0]; - } - else - { - threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0] - + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1; - threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0] - + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; - - threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] - + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1; - threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0] - + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; - } - - bool bHor = false, bVer = false; - if (bestInter->cu.m_partSize[0] == SIZE_2NxN) - bHor = true; - else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N) - bVer = true; - else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N && - md.bestMode && md.bestMode->cu.getQtRootCbf(0)) - { - bHor = true; - bVer = true; - } - - if (bHor) - { - int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU; - if (try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD) - { - refMasks[0] = allSplitRefs; /* 75% top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); - if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_2NxnD]; + threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] + + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1; + threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0] + + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; } - if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnU) + bool bHor = false, bVer = false; + if (bestInter->cu.m_partSize[0] == SIZE_2NxN) + bHor = true; + else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N) + bVer = true; + else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N && + md.bestMode && md.bestMode->cu.getQtRootCbf(0)) { - refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */ - refMasks[1] = allSplitRefs; /* 75% bot */ - md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks); - if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_2NxnU]; + bHor = true; + bVer = true; } - if (!try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD) + if (bHor) { - refMasks[0] = allSplitRefs; /* 75% top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); - if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_2NxnD]; + int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU; + if (try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD) + { + refMasks[0] = allSplitRefs; /* 75% top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); + if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_2NxnD]; + } + + if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnU) + { + refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */ + refMasks[1] = allSplitRefs; /* 75% bot */ + md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks); + if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_2NxnU]; + } + + if (!try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD) + { + refMasks[0] = allSplitRefs; /* 75% top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); + if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_2NxnD]; + } } - } - if (bVer) - { - int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N; - if (try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N) + if (bVer) { - refMasks[0] = allSplitRefs; /* 75% left */ - refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); - if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_nRx2N]; - } + int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N; + if (try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N) + { + refMasks[0] = allSplitRefs; /* 75% left */ + refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); + if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_nRx2N]; + } - if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nLx2N) - { - refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */ - refMasks[1] = allSplitRefs; /* 75% right */ - md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks); - if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_nLx2N]; - } + if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nLx2N) + { + refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */ + refMasks[1] = allSplitRefs; /* 75% right */ + md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks); + if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_nLx2N]; + } - if (!try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N) - { - refMasks[0] = allSplitRefs; /* 75% left */ - refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); - if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost) - bestInter = &md.pred[PRED_nRx2N]; + if (!try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N) + { + refMasks[0] = allSplitRefs; /* 75% left */ + refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); + if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost) + bestInter = &md.pred[PRED_nRx2N]; + } } } } @@ -1185,15 +1203,19 @@ motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true); } } - encodeResAndCalcRdInterCU(*bestInter, cuGeom); - checkBestMode(*bestInter, depth); - /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */ - if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 && - md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17) + if (!chooseMerge) { - encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom); - checkBestMode(md.pred[PRED_BIDIR], depth); + encodeResAndCalcRdInterCU(*bestInter, cuGeom); + checkBestMode(*bestInter, depth); + + /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */ + if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 && + md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17) + { + encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom); + checkBestMode(md.pred[PRED_BIDIR], depth); + } } if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) || @@ -1378,6 +1400,7 @@ bool foundSkip = false; bool earlyskip = false; bool splitIntra = true; + bool skipRectAmp = false; // avoid uninitialize value in below reference if (m_param->limitModes) @@ -1389,14 +1412,19 @@ if (m_param->analysisMode == X265_ANALYSIS_LOAD) { - if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] && m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP) + if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx]) { - md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); - md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); - checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); + if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP) + { + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); + checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - foundSkip = true; - earlyskip = !!m_param->bEnableEarlySkip; + foundSkip = true; + earlyskip = !!m_param->bEnableEarlySkip; + } + if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N) + skipRectAmp = true && !!md.bestMode; } } @@ -1502,150 +1530,153 @@ } } - if (m_param->bEnableRectInter) + if (!skipRectAmp) { - uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; - uint32_t threshold_2NxN, threshold_Nx2N; + if (m_param->bEnableRectInter) + { + uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; + uint32_t threshold_2NxN, threshold_Nx2N; - if (m_slice->m_sliceType == P_SLICE) - { - threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0]; - threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; - } - else - { - threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0] - + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1; - threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] - + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1; - } - - int try_2NxN_first = threshold_2NxN < threshold_Nx2N; - if (try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN) - { - refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); - checkBestMode(md.pred[PRED_2NxN], cuGeom.depth); - } - - if (splitCost < md.bestMode->rdCost + threshold_Nx2N) - { - refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */ - refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */ - md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks); - checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth); - } - - if (!try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN) - { - refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); - checkBestMode(md.pred[PRED_2NxN], cuGeom.depth); - } - } - - // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N) - if (m_slice->m_sps->maxAMPDepth > depth) - { - uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; - uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N; - - if (m_slice->m_sliceType == P_SLICE) - { - threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0]; - threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0]; - - threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; - threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0]; - } - else - { - threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0] + if (m_slice->m_sliceType == P_SLICE) + { + threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0]; + threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; + } + else + { + threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0] + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1; - threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0] - + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; - - threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] + threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1; - threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0] - + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; - } - - bool bHor = false, bVer = false; - if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN) - bHor = true; - else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N) - bVer = true; - else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0]) - { - bHor = true; - bVer = true; - } - - if (bHor) - { - int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU; - if (try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD) - { - refMasks[0] = allSplitRefs; /* 75% top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); - checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth); } - if (splitCost < md.bestMode->rdCost + threshold_2NxnU) + int try_2NxN_first = threshold_2NxN < threshold_Nx2N; + if (try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN) { - refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */ - refMasks[1] = allSplitRefs; /* 75% bot */ - md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks); - checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth); + refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ + md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); + checkBestMode(md.pred[PRED_2NxN], cuGeom.depth); } - if (!try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD) + if (splitCost < md.bestMode->rdCost + threshold_Nx2N) { - refMasks[0] = allSplitRefs; /* 75% top */ - refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); - checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth); + refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */ + refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */ + md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks); + checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth); + } + + if (!try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN) + { + refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */ + md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); + checkBestMode(md.pred[PRED_2NxN], cuGeom.depth); } } - if (bVer) + // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N) + if (m_slice->m_sps->maxAMPDepth > depth) { - int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N; - if (try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N) + uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost; + uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N; + + if (m_slice->m_sliceType == P_SLICE) { - refMasks[0] = allSplitRefs; /* 75% left */ - refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); - checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth); + threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0]; + threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0]; + + threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0]; + threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0]; + } + else + { + threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0] + + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1; + threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0] + + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; + + threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] + + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1; + threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0] + + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1; } - if (splitCost < md.bestMode->rdCost + threshold_nLx2N) + bool bHor = false, bVer = false; + if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN) + bHor = true; + else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N) + bVer = true; + else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0]) { - refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */ - refMasks[1] = allSplitRefs; /* 75% right */ - md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks); - checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth); + bHor = true; + bVer = true; } - if (!try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N) + if (bHor) { - refMasks[0] = allSplitRefs; /* 75% left */ - refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); - checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); - checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth); + int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU; + if (try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD) + { + refMasks[0] = allSplitRefs; /* 75% top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); + checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth); + } + + if (splitCost < md.bestMode->rdCost + threshold_2NxnU) + { + refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */ + refMasks[1] = allSplitRefs; /* 75% bot */ + md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks); + checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth); + } + + if (!try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD) + { + refMasks[0] = allSplitRefs; /* 75% top */ + refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */ + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); + checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth); + } + } + + if (bVer) + { + int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N; + if (try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N) + { + refMasks[0] = allSplitRefs; /* 75% left */ + refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); + checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth); + } + + if (splitCost < md.bestMode->rdCost + threshold_nLx2N) + { + refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */ + refMasks[1] = allSplitRefs; /* 75% right */ + md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks); + checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth); + } + + if (!try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N) + { + refMasks[0] = allSplitRefs; /* 75% left */ + refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */ + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); + checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth); + } } } } diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.h --- a/source/encoder/analysis.h Mon Mar 21 13:50:14 2016 +0530 +++ b/source/encoder/analysis.h Thu Mar 24 16:36:55 2016 +0530 @@ -122,6 +122,8 @@ int32_t* m_reuseRef; uint8_t* m_reuseDepth; uint8_t* m_reuseModes; + uint8_t* m_reusePartSize; + uint8_t* m_reuseMergeFlag; uint32_t m_splitRefIdx[4]; uint64_t* cacheCost; diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Mon Mar 21 13:50:14 2016 +0530 +++ b/source/encoder/encoder.cpp Thu Mar 24 16:36:55 2016 +0530 @@ -1918,6 +1918,8 @@ CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir); CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame); CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame); + CHECKED_MALLOC(interData->partSize, uint8_t, analysis->numPartitions * analysis->numCUsInFrame); + CHECKED_MALLOC(interData->mergeFlag, uint8_t, analysis->numPartitions * analysis->numCUsInFrame); CHECKED_MALLOC_ZERO(interData->wt, WeightParam, 3 * numDir); analysis->interData = interData; } @@ -1943,6 +1945,8 @@ X265_FREE(((analysis_inter_data*)analysis->interData)->ref); X265_FREE(((analysis_inter_data*)analysis->interData)->depth); X265_FREE(((analysis_inter_data*)analysis->interData)->modes); + X265_FREE(((analysis_inter_data*)analysis->interData)->mergeFlag); + X265_FREE(((analysis_inter_data*)analysis->interData)->partSize); X265_FREE(((analysis_inter_data*)analysis->interData)->wt); X265_FREE(analysis->interData); } @@ -2029,13 +2033,15 @@ else { - uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL; + uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSize = NULL, *mergeFlag = NULL; - tempBuf = X265_MALLOC(uint8_t, depthBytes * 2); - X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 2, m_analysisFile); + tempBuf = X265_MALLOC(uint8_t, depthBytes * 4); + X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 4, m_analysisFile); depthBuf = tempBuf; - modeBuf = tempBuf + depthBytes; + modeBuf = tempBuf + depthBytes; + partSize = modeBuf + depthBytes; + mergeFlag = partSize + depthBytes; size_t count = 0; for (uint32_t d = 0; d < depthBytes; d++) @@ -2043,13 +2049,15 @@ int bytes = analysis->numPartitions >> (depthBuf[d] * 2); memset(&((analysis_inter_data *)analysis->interData)->depth[count], depthBuf[d], bytes); memset(&((analysis_inter_data *)analysis->interData)->modes[count], modeBuf[d], bytes); + memset(&((analysis_inter_data *)analysis->interData)->partSize[count], partSize[d], bytes); + memset(&((analysis_inter_data *)analysis->interData)->mergeFlag[count], mergeFlag[d], bytes); count += bytes; } - + X265_FREE(tempBuf); - + int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2; - X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFile); + X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFile); uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 : 3; X265_FREAD(((analysis_inter_data *)analysis->interData)->wt, sizeof(WeightParam), numPlanes * numDir, m_analysisFile); consumedBytes += frameRecordSize; @@ -2105,6 +2113,8 @@ { uint8_t depth = 0; uint8_t predMode = 0; + uint8_t partSize = 0; + uint8_t mergeFlag = 0; CUData* ctu = curEncData.getPicCTU(cuAddr); analysis_inter_data* interDataCTU = (analysis_inter_data*)analysis->interData; @@ -2115,8 +2125,17 @@ interDataCTU->depth[depthBytes] = depth; predMode = ctu->m_predMode[absPartIdx]; + if (ctu->m_refIdx[1][absPartIdx] != -1) + predMode = 4; // used as indiacator if the block is coded as bidir + interDataCTU->modes[depthBytes] = predMode; + partSize = ctu->m_partSize[absPartIdx]; + interDataCTU->partSize[depthBytes] = partSize; + + mergeFlag = ctu->m_mergeFlag[absPartIdx]; + interDataCTU->mergeFlag[depthBytes] = mergeFlag; + absPartIdx += ctu->m_numPartitions >> (depth * 2); } } @@ -2130,9 +2149,9 @@ else { int numDir = (analysis->sliceType == X265_TYPE_P) ? 1 : 2; - analysis->frameRecordSize += depthBytes * 2; - analysis->frameRecordSize += sizeof(MV) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir; - analysis->frameRecordSize += sizeof(WeightParam) * 3 * numDir; + analysis->frameRecordSize += depthBytes * 4; + analysis->frameRecordSize += sizeof(int32_t)* analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir; + analysis->frameRecordSize += sizeof(WeightParam)* 3 * numDir; } X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile); X265_FWRITE(&depthBytes, sizeof(uint32_t), 1, m_analysisFile); @@ -2155,6 +2174,8 @@ int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2; X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), depthBytes, m_analysisFile); X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), depthBytes, m_analysisFile); + X265_FWRITE(((analysis_inter_data*)analysis->interData)->partSize, sizeof(uint8_t), depthBytes, m_analysisFile); + X265_FWRITE(((analysis_inter_data*)analysis->interData)->mergeFlag, sizeof(uint8_t), depthBytes, m_analysisFile); X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFile); uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 : 3; X265_FWRITE(((analysis_inter_data*)analysis->interData)->wt, sizeof(WeightParam), numPlanes * numDir, m_analysisFile); _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel