Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance
Sorry, the output mismatch was due to asm. Pushed. On Sun, Sep 14, 2014 at 4:35 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: This significantly changes outputs for P and B frames. Higher bitrates and higher SSIM. Lets do full regression testing on this - and compare the bitrate/ssim for all combinations to be reasonably sure there are no bugs. On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1410341620 -19800 # Wed Sep 10 15:03:40 2014 +0530 # Node ID d8be3c38915d4a628b804522da8946a152041203 # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f Search: remove redundant encode coefficients in intra for performance diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -1840,6 +1840,7 @@ void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv) { uint64_t puCost = 0; +uint32_t puBits = 0; uint32_t depth = cu-getDepth(0); uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; @@ -1851,7 +1852,7 @@ uint32_t tuDepthRange[2]; cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0); -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, tuDepthRange); +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange); xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv); //=== update PU data diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -111,47 +111,6 @@ return false; } -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode = cu-getTransformIdx(absPartIdx); -uint32_t subdiv = (trMode trDepth ? 1 : 0); -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; - -if (cu-getPredictionMode(0) == MODE_INTRA cu-getPartitionSize(0) == SIZE_NxN trDepth == 0) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize *(depthRange + 1)) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else if (log2TrSize == *depthRange) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else -{ -X265_CHECK(log2TrSize *depthRange, transform size too small\n); -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize); -} - -if (subdiv) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum, depthRange); - -return; -} - -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); -} - void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height) { uint32_t fullDepth = cu-getDepth(0) + trDepth; @@ -183,32 +142,6 @@ } } -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx) -{ -const TextType ttype = TEXT_LUMA; - -if (!cu-getCbf(absPartIdx, ttype, trDepth)) -return; - -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode= cu-getTransformIdx(absPartIdx); - -if (trMode trDepth) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum); - -return; -} - -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; -uint32_t qtLayer= log2TrSize - 2; -uint32_t coeffOffset = absPartIdx LOG2_UNIT_SIZE * 2; -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype); -} - void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype) { if (!cu-getCbf(absPartIdx, ttype, trDepth)) @@ -316,15 +249,6 @@ } } -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -m_entropyCoder-resetBits(); -xEncIntraHeaderLuma(cu, trDepth, absPartIdx); -
Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance
This significantly changes outputs for P and B frames. Higher bitrates and higher SSIM. Lets do full regression testing on this - and compare the bitrate/ssim for all combinations to be reasonably sure there are no bugs. On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1410341620 -19800 # Wed Sep 10 15:03:40 2014 +0530 # Node ID d8be3c38915d4a628b804522da8946a152041203 # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f Search: remove redundant encode coefficients in intra for performance diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -1840,6 +1840,7 @@ void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv) { uint64_t puCost = 0; +uint32_t puBits = 0; uint32_t depth = cu-getDepth(0); uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; @@ -1851,7 +1852,7 @@ uint32_t tuDepthRange[2]; cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0); -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, tuDepthRange); +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange); xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv); //=== update PU data diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -111,47 +111,6 @@ return false; } -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode = cu-getTransformIdx(absPartIdx); -uint32_t subdiv = (trMode trDepth ? 1 : 0); -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; - -if (cu-getPredictionMode(0) == MODE_INTRA cu-getPartitionSize(0) == SIZE_NxN trDepth == 0) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize *(depthRange + 1)) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else if (log2TrSize == *depthRange) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else -{ -X265_CHECK(log2TrSize *depthRange, transform size too small\n); -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize); -} - -if (subdiv) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum, depthRange); - -return; -} - -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); -} - void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height) { uint32_t fullDepth = cu-getDepth(0) + trDepth; @@ -183,32 +142,6 @@ } } -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx) -{ -const TextType ttype = TEXT_LUMA; - -if (!cu-getCbf(absPartIdx, ttype, trDepth)) -return; - -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode= cu-getTransformIdx(absPartIdx); - -if (trMode trDepth) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum); - -return; -} - -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; -uint32_t qtLayer= log2TrSize - 2; -uint32_t coeffOffset = absPartIdx LOG2_UNIT_SIZE * 2; -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype); -} - void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype) { if (!cu-getCbf(absPartIdx, ttype, trDepth)) @@ -316,15 +249,6 @@ } } -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -m_entropyCoder-resetBits(); -xEncIntraHeaderLuma(cu, trDepth, absPartIdx); -xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange); -xEncCoeffQTLuma(cu, trDepth, absPartIdx); -return