Sorry, the output mismatch was due to asm. Pushed. On Sun, Sep 14, 2014 at 4:35 PM, Deepthi Nandakumar < deep...@multicorewareinc.com> wrote:
> This significantly changes outputs for P and B frames. Higher bitrates and > higher SSIM. Lets do full regression testing on this - and compare the > bitrate/ssim for all combinations to be reasonably sure there are no bugs. > > On Fri, Sep 12, 2014 at 7:47 PM, <as...@multicorewareinc.com> wrote: > >> # HG changeset patch >> # User Ashok Kumar Mishra<as...@multicorewareinc.com> >> # Date 1410341620 -19800 >> # Wed Sep 10 15:03:40 2014 +0530 >> # Node ID d8be3c38915d4a628b804522da8946a152041203 >> # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f >> Search: remove redundant encode coefficients in intra for performance >> >> diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp >> --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700 >> +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530 >> @@ -1840,6 +1840,7 @@ >> void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, >> TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv) >> { >> uint64_t puCost = 0; >> + uint32_t puBits = 0; >> uint32_t depth = cu->getDepth(0); >> uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; >> >> @@ -1851,7 +1852,7 @@ >> uint32_t tuDepthRange[2]; >> cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0); >> >> - uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, >> predYuv, outResiYuv, false, puCost, tuDepthRange); >> + uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, >> predYuv, outResiYuv, false, puCost, puBits, tuDepthRange); >> xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv); >> >> //=== update PU data ==== >> diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp >> --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700 >> +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530 >> @@ -111,47 +111,6 @@ >> return false; >> } >> >> -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t depthRange[2]) >> -{ >> - uint32_t fullDepth = cu->getDepth(0) + trDepth; >> - uint32_t trMode = cu->getTransformIdx(absPartIdx); >> - uint32_t subdiv = (trMode > trDepth ? 1 : 0); >> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; >> - >> - if (cu->getPredictionMode(0) == MODE_INTRA && >> cu->getPartitionSize(0) == SIZE_NxN && trDepth == 0) >> - { >> - X265_CHECK(subdiv, "subdivision not present\n"); >> - } >> - else if (log2TrSize > *(depthRange + 1)) >> - { >> - X265_CHECK(subdiv, "subdivision not present\n"); >> - } >> - else if (log2TrSize == cu->m_slice->m_sps->quadtreeTULog2MinSize) >> - { >> - X265_CHECK(!subdiv, "subdivision present\n"); >> - } >> - else if (log2TrSize == *depthRange) >> - { >> - X265_CHECK(!subdiv, "subdivision present\n"); >> - } >> - else >> - { >> - X265_CHECK(log2TrSize > *depthRange, "transform size too >> small\n"); >> - m_entropyCoder->codeTransformSubdivFlag(subdiv, 5 - log2TrSize); >> - } >> - >> - if (subdiv) >> - { >> - uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth >> + 1) << 1); >> - for (uint32_t part = 0; part < 4; part++) >> - xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * >> qtPartNum, depthRange); >> - >> - return; >> - } >> - >> - m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); >> -} >> - >> void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t >> height) >> { >> uint32_t fullDepth = cu->getDepth(0) + trDepth; >> @@ -183,32 +142,6 @@ >> } >> } >> >> -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t >> absPartIdx) >> -{ >> - const TextType ttype = TEXT_LUMA; >> - >> - if (!cu->getCbf(absPartIdx, ttype, trDepth)) >> - return; >> - >> - uint32_t fullDepth = cu->getDepth(0) + trDepth; >> - uint32_t trMode = cu->getTransformIdx(absPartIdx); >> - >> - if (trMode > trDepth) >> - { >> - uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth >> + 1) << 1); >> - for (uint32_t part = 0; part < 4; part++) >> - xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * >> qtPartNum); >> - >> - return; >> - } >> - >> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; >> - uint32_t qtLayer = log2TrSize - 2; >> - uint32_t coeffOffset = absPartIdx << LOG2_UNIT_SIZE * 2; >> - coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; >> - m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, >> ttype); >> -} >> - >> void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, TextType ttype) >> { >> if (!cu->getCbf(absPartIdx, ttype, trDepth)) >> @@ -316,15 +249,6 @@ >> } >> } >> >> -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t depthRange[2]) >> -{ >> - m_entropyCoder->resetBits(); >> - xEncIntraHeaderLuma(cu, trDepth, absPartIdx); >> - xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange); >> - xEncCoeffQTLuma(cu, trDepth, absPartIdx); >> - return m_entropyCoder->getNumberOfWrittenBits(); >> -} >> - >> uint32_t Search::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t absPartIdxStep) >> { >> int cuSize = 1 << cu->getLog2CUSize(absPartIdx); >> @@ -340,7 +264,14 @@ >> { >> m_entropyCoder->resetBits(); >> xEncIntraHeaderLuma(cu, trDepth, absPartIdx); >> - xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange); >> + >> + //Transform subdiv flag >> + if (log2TrSize != *depthRange) >> + m_entropyCoder->codeTransformSubdivFlag(0, 5 - log2TrSize); >> + >> + //===== Cbfs ===== >> + uint32_t trMode = cu->getTransformIdx(absPartIdx); >> + m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); >> >> if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth)) >> m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, >> TEXT_LUMA); >> @@ -463,7 +394,7 @@ >> >> /* returns distortion. TODO reorder params */ >> uint32_t Search::xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, >> - ShortYuv* resiYuv, bool >> bAllowRQTSplit, uint64_t& rdCost, uint32_t depthRange[2]) >> + ShortYuv* resiYuv, bool >> bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2]) >> { >> uint32_t fullDepth = cu->getDepth(0) + trDepth; >> uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; >> @@ -490,8 +421,9 @@ >> if (!bAllowRQTSplit && noSplitIntraMaxTuSize) >> bCheckSplit = false; >> >> - uint64_t singleCost = MAX_INT64; >> - uint32_t singleDistY = 0; >> + uint64_t singleCost = MAX_INT64; >> + uint32_t singleDistY = 0; >> + uint32_t singleBits = 0; >> uint32_t singlePsyEnergyY = 0; >> uint32_t singleCbfY = 0; >> int bestModeId = 0; >> @@ -580,7 +512,7 @@ >> break; >> else >> { >> - uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, >> absPartIdx, log2TrSize, coeff, depthRange); >> + singleBits = xGetIntraBitsLuma(cu, trDepth, >> absPartIdx, log2TrSize, coeff, depthRange); >> if (m_rdCost.m_psyRd) >> singleCostTmp = >> m_rdCost.calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp); >> else >> @@ -634,7 +566,7 @@ >> } >> cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, >> absPartIdx, fullDepth); >> >> - uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, >> absPartIdx, log2TrSize, coeffY, depthRange); >> + singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, >> log2TrSize, coeffY, depthRange); >> if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice) >> singleBits *= 4; >> >> @@ -663,23 +595,30 @@ >> uint32_t qPartsDiv = cu->m_pic->getNumPartInCU() >> >> ((fullDepth + 1) << 1); >> uint32_t absPartIdxSub = absPartIdx; >> uint32_t splitCbfY = 0; >> + uint32_t splitBits = 0; >> >> for (uint32_t part = 0; part < 4; part++, absPartIdxSub += >> qPartsDiv) >> { >> cu->m_psyEnergy = 0; >> - splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, >> absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, >> depthRange); >> + splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, >> absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, >> splitBits, depthRange); >> splitPsyEnergyY += cu->m_psyEnergy; >> splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + >> 1); >> } >> + >> + if (bCheckFull) >> + { >> + m_entropyCoder->resetBits(); >> + >> + //subdiv >> + if (log2TrSize != *depthRange) >> + m_entropyCoder->codeTransformSubdivFlag(1, 5 - >> log2TrSize); >> + >> + splitBits += m_entropyCoder->getNumberOfWrittenBits(); >> + } >> >> for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++) >> cu->getCbf(TEXT_LUMA)[absPartIdx + offs] |= (splitCbfY << >> trDepth); >> >> - // restore context states >> - >> m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_QT_TRAFO_ROOT]); >> - >> - // determine rate and r-d cost >> - uint32_t splitBits = xGetIntraBitsQTLuma(cu, trDepth, >> absPartIdx, depthRange); >> if (m_rdCost.m_psyRd) >> splitCost = m_rdCost.calcPsyRdCost(splitDistY, splitBits, >> splitPsyEnergyY); >> else >> @@ -689,6 +628,7 @@ >> { >> outDist += splitDistY; >> rdCost += splitCost; >> + rdBits += splitBits; >> cu->m_psyEnergy = splitPsyEnergyY; >> return outDist; >> } >> @@ -717,6 +657,7 @@ >> } >> >> rdCost += singleCost; >> + rdBits += singleBits; >> cu->m_psyEnergy = singlePsyEnergyY; >> return outDist + singleDistY; >> } >> @@ -1416,6 +1357,7 @@ >> uint32_t bestPUDistY = 0; >> uint64_t bestPUCost = MAX_INT64; >> uint32_t puDistY; >> + uint32_t puBits; >> uint64_t puCost; >> for (int mode = 0; mode < numModesForFullRD; mode++) >> { >> @@ -1427,7 +1369,8 @@ >> >> // determine residual for partition >> puCost = 0; >> - puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, >> fencYuv, predYuv, resiYuv, false, puCost, depthRange); >> + puBits = 0; >> + puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, >> fencYuv, predYuv, resiYuv, false, puCost, puBits, depthRange); >> >> // check r-d cost >> if (puCost < bestPUCost) >> @@ -1446,7 +1389,8 @@ >> >> // determine residual for partition >> puCost = 0; >> - puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, >> fencYuv, predYuv, resiYuv, true, puCost, depthRange); >> + puBits = 0; >> + puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, >> fencYuv, predYuv, resiYuv, true, puCost, puBits, depthRange); >> >> overallDistY += (puCost >= bestPUCost) ? bestPUDistY : puDistY; >> >> diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.h >> --- a/source/encoder/search.h Thu Sep 11 17:25:40 2014 -0700 >> +++ b/source/encoder/search.h Wed Sep 10 15:03:40 2014 +0530 >> @@ -129,14 +129,11 @@ >> void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, >> ShortYuv* resiYuv, uint32_t depth, bool bSpatial); >> void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t >> absPartIdx, TComYuv* reconYuv); >> >> - void xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t >> absPartIdx, uint32_t depthRange[2]); >> void xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t >> height); >> - >> - void xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t >> absPartIdx); >> void xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t >> absPartIdx, TextType ttype); >> void xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t >> absPartIdx); >> void xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx); >> - uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t depthRange[2]); >> + >> uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t absPartIdxStep); >> uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t >> depthRange[2]); >> uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, >> uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff); >> @@ -147,7 +144,7 @@ >> uint64_t &rdCost, uint32_t &outBits, >> uint32_t *zeroDist, uint32_t tuDepthRange[2]); >> >> uint32_t xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, >> - ShortYuv* resiYuv, bool bAllowRQTSplit, >> uint64_t& dRDCost, uint32_t depthRange[2]); >> + ShortYuv* resiYuv, bool bAllowRQTSplit, >> uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2]); >> >> uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, >> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv); >> >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel