Thanks, Min. This is a solution but will affect performance with an extra TComDataCU*. I have sent another patch where I'm just re-encoding the CU if lossless is chosen as the best mode. This will not affect normal analysis. Can you review that?
Deepthi On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho <st...@borho.org> wrote: > # HG changeset patch > # User Min Chen <chenm...@163.com> > # Date 1409002891 18000 > # Mon Aug 25 16:41:31 2014 -0500 > # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264 > # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b > analysis: fix inter hash mistake with --cu-lossless > > diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp > --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 > +0900 > +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 > -0500 > @@ -2293,7 +2293,7 @@ > * \returns void > */ > void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* > fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, > - ShortYuv* outBestResiYuv, > TComYuv* outReconYuv) > + ShortYuv* outBestResiYuv, > TComYuv* outReconYuv, TComDataCU* tmpCu) > { > X265_CHECK(!cu->isIntra(0), "intra CU not expected\n"); > > @@ -2321,6 +2321,7 @@ > } > > uint64_t bestCost = MAX_INT64; > + bool bestTransquantBypassFlag = bIsTQBypassEnable; > > for (uint32_t modeId = 0; modeId < numModes; modeId++) > { > @@ -2388,15 +2389,29 @@ > if (cu->getQtRootCbf(0)) > xSetResidualQTData(cu, 0, outBestResiYuv, depth, true); > > + bestTransquantBypassFlag = bIsLosslessMode; > bestBits = bits; > bestCost = cost; > bestCoeffBits = cu->m_coeffBits; > m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]); > } > + > + // Save lossless mode coeff > + if (bIsLosslessMode) > + { > + tmpCu->copyPartFrom(cu, 0, depth, false); > + } > } > > X265_CHECK(bestCost != MAX_INT64, "no best cost\n"); > > + if (bestTransquantBypassFlag && !m_param->bLossless) > + { > + assert(log2CUSize > 2); > + cu->setCUTransquantBypassSubParts(true, 0, depth); > + cu->copyPartFrom(tmpCu, 0, depth, false); > + } > + > if (cu->getQtRootCbf(0)) > outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize); > else > diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h > --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 17:53:12 2014 > +0900 > +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 16:41:31 2014 > -0500 > @@ -147,7 +147,7 @@ > > /// encode residual and compute rd-cost for inter mode > void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, > TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, > - TComYuv* reconYuv); > + TComYuv* reconYuv, TComDataCU* tmpCu); > void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, > TComYuv* predYuv, TComYuv* reconYuv); > > void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx, TComYuv* fencYuv, > diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp > --- a/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900 > +++ b/source/encoder/analysis.cpp Mon Aug 25 16:41:31 2014 -0500 > @@ -82,7 +82,7 @@ > uint32_t sizeL = cuSize * cuSize; > uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + > CHROMA_V_SHIFT(csp)); > > - ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, > tqBypass); > + ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, > tqBypass); > > m_interCU_2Nx2N[i] = new TComDataCU; > m_interCU_2Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize, > csp, 0, tqBypass); > @@ -108,6 +108,9 @@ > m_tempCU[i] = new TComDataCU; > m_tempCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 7, > tqBypass); > > + m_tempLosslessCU[i] = new TComDataCU; > + m_tempLosslessCU[i]->create(&m_memPool[i], numPartitions, cuSize, > csp, 8, tqBypass); > + > m_bestPredYuv[i] = new TComYuv; > ok &= m_bestPredYuv[i]->create(cuSize, cuSize, csp); > > @@ -158,6 +161,7 @@ > delete m_bestMergeCU[i]; > delete m_bestCU[i]; > delete m_tempCU[i]; > + delete m_tempLosslessCU[i]; > > if (m_bestPredYuv && m_bestPredYuv[i]) > { > @@ -240,6 +244,7 @@ > // initialize CU data > m_bestCU[0]->initCU(cu->m_pic, cu->getAddr()); > m_tempCU[0]->initCU(cu->m_pic, cu->getAddr()); > + m_tempLosslessCU[0]->initCU(cu->m_pic, cu->getAddr()); > > // analysis of CU > uint32_t numPartition = cu->getTotalNumPart(); > @@ -394,6 +399,7 @@ > uint32_t nextDepth = depth + 1; > TComDataCU* subBestPartCU = m_bestCU[nextDepth]; > TComDataCU* subTempPartCU = m_tempCU[nextDepth]; > + TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth]; > for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++) > { > int qp = outTempCU->getQP(0); > @@ -404,6 +410,7 @@ > (subBestPartCU->getCUPelY() < > slice->m_sps->picHeightInLumaSamples))) > { > subTempPartCU->initSubCU(outTempCU, partUnitIdx, > nextDepth, qp); // clear sub partition datas or init. > + subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, > nextDepth, qp); // clear sub partition datas or init. > if (0 == partUnitIdx) //initialize RD with previous depth > buffer > { > > > m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]); > @@ -663,7 +670,7 @@ > } > > encodeResAndCalcRdInterCU(outBestCU, > m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], > - m_bestResiYuv[depth], > m_bestRecoYuv[depth]); > + m_bestResiYuv[depth], > m_bestRecoYuv[depth], m_tempLosslessCU[depth]); > uint64_t bestMergeCost = m_rdCost.m_psyRd ? > m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost; > uint64_t bestCost = m_rdCost.m_psyRd ? > outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost; > if (bestMergeCost < bestCost) > @@ -733,7 +740,7 @@ > } > > encodeResAndCalcRdInterCU(outBestCU, > m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], > - m_bestResiYuv[depth], > m_bestRecoYuv[depth]); > + m_bestResiYuv[depth], > m_bestRecoYuv[depth], m_tempLosslessCU[depth]); > > > m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]); > } > else if (outBestCU->getPredictionMode(0) == > MODE_INTRA) > @@ -880,10 +887,12 @@ > outTempCU->setQPSubParts(qp, 0, depth); > uint32_t nextDepth = depth + 1; > TComDataCU* subTempPartCU = m_tempCU[nextDepth]; > + TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth]; > for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++) > { > TComDataCU* subBestPartCU = NULL; > subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, > qp); // clear sub partition datas or init. > + subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, > nextDepth, qp); > > if (bInsidePicture || > ((subTempPartCU->getCUPelX() < > slice->m_sps->picWidthInLumaSamples) && > @@ -1258,10 +1267,12 @@ > uint32_t nextDepth = depth + 1; > TComDataCU* subBestPartCU = m_bestCU[nextDepth]; > TComDataCU* subTempPartCU = m_tempCU[nextDepth]; > + TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth]; > for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++) > { > int qp = outTempCU->getQP(0); > subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, > qp); // clear sub partition datas or init. > + subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, > nextDepth, qp); > > if (bInsidePicture || > ((subBestPartCU->getCUPelX() < > slice->m_sps->picWidthInLumaSamples) && > @@ -1433,7 +1444,7 @@ > } > > //Encode with residue > - encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], > bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], > m_tmpRecoYuv[depth]); > + encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], > bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], > m_tmpRecoYuv[depth], m_tempLosslessCU[depth]); > > uint64_t tempCost = m_rdCost.m_psyRd ? > outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost; > uint64_t bestCost = m_rdCost.m_psyRd ? > outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost; > @@ -1506,7 +1517,8 @@ > m_tmpPredYuv[depth], > m_tmpResiYuv[depth], > m_bestResiYuv[depth], > - m_tmpRecoYuv[depth]); > + m_tmpRecoYuv[depth], > + > m_tempLosslessCU[depth]); > > > /* Todo: Fix the satd cost estimates. Why is merge > being chosen in high motion areas: estimated distortion is too low? */ > @@ -1590,7 +1602,7 @@ > > if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true)) > { > - encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], > m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], > m_tmpRecoYuv[depth]); > + encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], > m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], > m_tmpRecoYuv[depth], m_tempLosslessCU[depth]); > checkDQP(outTempCU); > checkBestMode(outBestCU, outTempCU, depth); > } > diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.h > --- a/source/encoder/analysis.h Mon Aug 25 17:53:12 2014 +0900 > +++ b/source/encoder/analysis.h Mon Aug 25 16:41:31 2014 -0500 > @@ -82,6 +82,7 @@ > TComDataCU* m_bestMergeCU[NUM_CU_DEPTH]; > TComDataCU* m_bestCU[NUM_CU_DEPTH]; // Best CUs at each depth > TComDataCU* m_tempCU[NUM_CU_DEPTH]; // Temporary CUs at each depth > + TComDataCU* m_tempLosslessCU[NUM_CU_DEPTH]; // Temporary CUs for > lossless at each depth > > TComYuv** m_bestPredYuv; // Best Prediction Yuv for each > depth > ShortYuv** m_bestResiYuv; // Best Residual Yuv for each > depth > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel