This significantly changes outputs for P and B frames. Higher bitrates and higher SSIM. Lets do full regression testing on this - and compare the bitrate/ssim for all combinations to be reasonably sure there are no bugs.
On Fri, Sep 12, 2014 at 7:47 PM, <as...@multicorewareinc.com> wrote: > # HG changeset patch > # User Ashok Kumar Mishra<as...@multicorewareinc.com> > # Date 1410341620 -19800 > # Wed Sep 10 15:03:40 2014 +0530 > # Node ID d8be3c38915d4a628b804522da8946a152041203 > # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f > Search: remove redundant encode coefficients in intra for performance > > diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp > --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700 > +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530 > @@ -1840,6 +1840,7 @@ > void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, > TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv) > { > uint64_t puCost = 0; > + uint32_t puBits = 0; > uint32_t depth = cu->getDepth(0); > uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; > > @@ -1851,7 +1852,7 @@ > uint32_t tuDepthRange[2]; > cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0); > > - uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, > predYuv, outResiYuv, false, puCost, tuDepthRange); > + uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, > predYuv, outResiYuv, false, puCost, puBits, tuDepthRange); > xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv); > > //=== update PU data ==== > diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp > --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700 > +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530 > @@ -111,47 +111,6 @@ > return false; > } > > -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, uint32_t depthRange[2]) > -{ > - uint32_t fullDepth = cu->getDepth(0) + trDepth; > - uint32_t trMode = cu->getTransformIdx(absPartIdx); > - uint32_t subdiv = (trMode > trDepth ? 1 : 0); > - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; > - > - if (cu->getPredictionMode(0) == MODE_INTRA && cu->getPartitionSize(0) > == SIZE_NxN && trDepth == 0) > - { > - X265_CHECK(subdiv, "subdivision not present\n"); > - } > - else if (log2TrSize > *(depthRange + 1)) > - { > - X265_CHECK(subdiv, "subdivision not present\n"); > - } > - else if (log2TrSize == cu->m_slice->m_sps->quadtreeTULog2MinSize) > - { > - X265_CHECK(!subdiv, "subdivision present\n"); > - } > - else if (log2TrSize == *depthRange) > - { > - X265_CHECK(!subdiv, "subdivision present\n"); > - } > - else > - { > - X265_CHECK(log2TrSize > *depthRange, "transform size too > small\n"); > - m_entropyCoder->codeTransformSubdivFlag(subdiv, 5 - log2TrSize); > - } > - > - if (subdiv) > - { > - uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth + > 1) << 1); > - for (uint32_t part = 0; part < 4; part++) > - xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * > qtPartNum, depthRange); > - > - return; > - } > - > - m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); > -} > - > void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t > height) > { > uint32_t fullDepth = cu->getDepth(0) + trDepth; > @@ -183,32 +142,6 @@ > } > } > > -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx) > -{ > - const TextType ttype = TEXT_LUMA; > - > - if (!cu->getCbf(absPartIdx, ttype, trDepth)) > - return; > - > - uint32_t fullDepth = cu->getDepth(0) + trDepth; > - uint32_t trMode = cu->getTransformIdx(absPartIdx); > - > - if (trMode > trDepth) > - { > - uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth + > 1) << 1); > - for (uint32_t part = 0; part < 4; part++) > - xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * > qtPartNum); > - > - return; > - } > - > - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; > - uint32_t qtLayer = log2TrSize - 2; > - uint32_t coeffOffset = absPartIdx << LOG2_UNIT_SIZE * 2; > - coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; > - m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, > ttype); > -} > - > void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx, TextType ttype) > { > if (!cu->getCbf(absPartIdx, ttype, trDepth)) > @@ -316,15 +249,6 @@ > } > } > > -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, uint32_t depthRange[2]) > -{ > - m_entropyCoder->resetBits(); > - xEncIntraHeaderLuma(cu, trDepth, absPartIdx); > - xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange); > - xEncCoeffQTLuma(cu, trDepth, absPartIdx); > - return m_entropyCoder->getNumberOfWrittenBits(); > -} > - > uint32_t Search::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, uint32_t absPartIdxStep) > { > int cuSize = 1 << cu->getLog2CUSize(absPartIdx); > @@ -340,7 +264,14 @@ > { > m_entropyCoder->resetBits(); > xEncIntraHeaderLuma(cu, trDepth, absPartIdx); > - xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange); > + > + //Transform subdiv flag > + if (log2TrSize != *depthRange) > + m_entropyCoder->codeTransformSubdivFlag(0, 5 - log2TrSize); > + > + //===== Cbfs ===== > + uint32_t trMode = cu->getTransformIdx(absPartIdx); > + m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); > > if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth)) > m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, > TEXT_LUMA); > @@ -463,7 +394,7 @@ > > /* returns distortion. TODO reorder params */ > uint32_t Search::xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, > - ShortYuv* resiYuv, bool > bAllowRQTSplit, uint64_t& rdCost, uint32_t depthRange[2]) > + ShortYuv* resiYuv, bool > bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2]) > { > uint32_t fullDepth = cu->getDepth(0) + trDepth; > uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; > @@ -490,8 +421,9 @@ > if (!bAllowRQTSplit && noSplitIntraMaxTuSize) > bCheckSplit = false; > > - uint64_t singleCost = MAX_INT64; > - uint32_t singleDistY = 0; > + uint64_t singleCost = MAX_INT64; > + uint32_t singleDistY = 0; > + uint32_t singleBits = 0; > uint32_t singlePsyEnergyY = 0; > uint32_t singleCbfY = 0; > int bestModeId = 0; > @@ -580,7 +512,7 @@ > break; > else > { > - uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, > absPartIdx, log2TrSize, coeff, depthRange); > + singleBits = xGetIntraBitsLuma(cu, trDepth, > absPartIdx, log2TrSize, coeff, depthRange); > if (m_rdCost.m_psyRd) > singleCostTmp = > m_rdCost.calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp); > else > @@ -634,7 +566,7 @@ > } > cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, > absPartIdx, fullDepth); > > - uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, > absPartIdx, log2TrSize, coeffY, depthRange); > + singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, > log2TrSize, coeffY, depthRange); > if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice) > singleBits *= 4; > > @@ -663,23 +595,30 @@ > uint32_t qPartsDiv = cu->m_pic->getNumPartInCU() >> > ((fullDepth + 1) << 1); > uint32_t absPartIdxSub = absPartIdx; > uint32_t splitCbfY = 0; > + uint32_t splitBits = 0; > > for (uint32_t part = 0; part < 4; part++, absPartIdxSub += > qPartsDiv) > { > cu->m_psyEnergy = 0; > - splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, > absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, > depthRange); > + splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, > absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, > splitBits, depthRange); > splitPsyEnergyY += cu->m_psyEnergy; > splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + > 1); > } > + > + if (bCheckFull) > + { > + m_entropyCoder->resetBits(); > + > + //subdiv > + if (log2TrSize != *depthRange) > + m_entropyCoder->codeTransformSubdivFlag(1, 5 - > log2TrSize); > + > + splitBits += m_entropyCoder->getNumberOfWrittenBits(); > + } > > for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++) > cu->getCbf(TEXT_LUMA)[absPartIdx + offs] |= (splitCbfY << > trDepth); > > - // restore context states > - > m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_QT_TRAFO_ROOT]); > - > - // determine rate and r-d cost > - uint32_t splitBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, > depthRange); > if (m_rdCost.m_psyRd) > splitCost = m_rdCost.calcPsyRdCost(splitDistY, splitBits, > splitPsyEnergyY); > else > @@ -689,6 +628,7 @@ > { > outDist += splitDistY; > rdCost += splitCost; > + rdBits += splitBits; > cu->m_psyEnergy = splitPsyEnergyY; > return outDist; > } > @@ -717,6 +657,7 @@ > } > > rdCost += singleCost; > + rdBits += singleBits; > cu->m_psyEnergy = singlePsyEnergyY; > return outDist + singleDistY; > } > @@ -1416,6 +1357,7 @@ > uint32_t bestPUDistY = 0; > uint64_t bestPUCost = MAX_INT64; > uint32_t puDistY; > + uint32_t puBits; > uint64_t puCost; > for (int mode = 0; mode < numModesForFullRD; mode++) > { > @@ -1427,7 +1369,8 @@ > > // determine residual for partition > puCost = 0; > - puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, > fencYuv, predYuv, resiYuv, false, puCost, depthRange); > + puBits = 0; > + puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, > fencYuv, predYuv, resiYuv, false, puCost, puBits, depthRange); > > // check r-d cost > if (puCost < bestPUCost) > @@ -1446,7 +1389,8 @@ > > // determine residual for partition > puCost = 0; > - puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, > fencYuv, predYuv, resiYuv, true, puCost, depthRange); > + puBits = 0; > + puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, > fencYuv, predYuv, resiYuv, true, puCost, puBits, depthRange); > > overallDistY += (puCost >= bestPUCost) ? bestPUDistY : puDistY; > > diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.h > --- a/source/encoder/search.h Thu Sep 11 17:25:40 2014 -0700 > +++ b/source/encoder/search.h Wed Sep 10 15:03:40 2014 +0530 > @@ -129,14 +129,11 @@ > void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, > ShortYuv* resiYuv, uint32_t depth, bool bSpatial); > void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx, TComYuv* reconYuv); > > - void xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx, uint32_t depthRange[2]); > void xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height); > - > - void xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx); > void xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx, TextType ttype); > void xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx); > void xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx); > - uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, uint32_t depthRange[2]); > + > uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, uint32_t absPartIdxStep); > uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t > absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2]); > uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, > uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff); > @@ -147,7 +144,7 @@ > uint64_t &rdCost, uint32_t &outBits, > uint32_t *zeroDist, uint32_t tuDepthRange[2]); > > uint32_t xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, > - ShortYuv* resiYuv, bool bAllowRQTSplit, > uint64_t& dRDCost, uint32_t depthRange[2]); > + ShortYuv* resiYuv, bool bAllowRQTSplit, > uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2]); > > uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, > uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv); > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel