Re: [x265] [PATCH RFC] analysis: add CU specific details to encodeCU()
On Fri, Sep 12, 2014 at 6:05 PM, santhosh...@multicorewareinc.com wrote: # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1410525310 -19800 # Fri Sep 12 18:05:10 2014 +0530 # Node ID bf4ebe5df0cab013e4462597b55bd505b2a6a71a # Parent 7e29b10982d2eb7fd79f581d6f04184522ba analysis: add CU specific details to encodeCU() diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/analysis.cpp Fri Sep 12 18:05:10 2014 +0530 @@ -301,7 +301,6 @@ { if (cu-m_slice-m_pps-bUseDQP) m_bEncodeDQP = true; -loadCTUData(cu); // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cppFri Sep 12 18:05:10 2014 +0530 @@ -481,14 +481,14 @@ } } -void Entropy::encodeCTU(TComDataCU* cu) +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData) { bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP; -encodeCU(cu, 0, 0, false, bEncodeDQP); +encodeCU(cu, 0, 0, bEncodeDQP, cuData); } /* encode a CU block recursively */ -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP) +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU* cuData) { Frame* pic = cu-m_pic; Slice* slice = cu-m_slice; @@ -496,30 +496,26 @@ if (depth = slice-m_pps-maxCuDQPDepth slice-m_pps-bUseDQP) bEncodeDQP = true; -if (!bInsidePicture) +int cu_split_flag = !(cuData-flags CU::LEAF); +int cu_unsplit_flag = !(cuData-flags CU::SPLIT_MANDATORY); + +uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); +uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); + +if (!cu_unsplit_flag) { -uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); -uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); -uint32_t cuSize = g_maxCUSize depth; - -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax - g_zscanToPelY[absPartIdx] + cuSize = ymax); - -if (!bInsidePicture) +uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; +for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) { -uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; -for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -{ -if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); -} - -return; +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) Most of the patch looks correct, but can't the above if-check also replaced by one of the childCU flags? +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); } +return; } // We need to split, so don't try these modes. -if (bInsidePicture depth g_maxCUDepth) +if (cu_split_flag) codeSplitFlag(cu, absPartIdx, depth); if (depth cu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -527,7 +523,10 @@ uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); +{ +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); +} return; } diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.h --- a/source/encoder/entropy.h Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.h Fri Sep 12 18:05:10 2014 +0530 @@ -148,7 +148,7 @@ void codeShortTermRefPicSet(RPS* rps); void finishSlice() { encodeBinTrm(1); finish(); dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); } -void encodeCTU(TComDataCU* cu); +void encodeCTU(TComDataCU* cu, CU *cuData); void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx); void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp); void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); } @@ -193,7 +193,7 @@ void
Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance
This significantly changes outputs for P and B frames. Higher bitrates and higher SSIM. Lets do full regression testing on this - and compare the bitrate/ssim for all combinations to be reasonably sure there are no bugs. On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1410341620 -19800 # Wed Sep 10 15:03:40 2014 +0530 # Node ID d8be3c38915d4a628b804522da8946a152041203 # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f Search: remove redundant encode coefficients in intra for performance diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -1840,6 +1840,7 @@ void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv) { uint64_t puCost = 0; +uint32_t puBits = 0; uint32_t depth = cu-getDepth(0); uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; @@ -1851,7 +1852,7 @@ uint32_t tuDepthRange[2]; cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0); -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, tuDepthRange); +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange); xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv); //=== update PU data diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -111,47 +111,6 @@ return false; } -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode = cu-getTransformIdx(absPartIdx); -uint32_t subdiv = (trMode trDepth ? 1 : 0); -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; - -if (cu-getPredictionMode(0) == MODE_INTRA cu-getPartitionSize(0) == SIZE_NxN trDepth == 0) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize *(depthRange + 1)) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else if (log2TrSize == *depthRange) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else -{ -X265_CHECK(log2TrSize *depthRange, transform size too small\n); -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize); -} - -if (subdiv) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum, depthRange); - -return; -} - -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); -} - void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height) { uint32_t fullDepth = cu-getDepth(0) + trDepth; @@ -183,32 +142,6 @@ } } -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx) -{ -const TextType ttype = TEXT_LUMA; - -if (!cu-getCbf(absPartIdx, ttype, trDepth)) -return; - -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode= cu-getTransformIdx(absPartIdx); - -if (trMode trDepth) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum); - -return; -} - -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; -uint32_t qtLayer= log2TrSize - 2; -uint32_t coeffOffset = absPartIdx LOG2_UNIT_SIZE * 2; -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype); -} - void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype) { if (!cu-getCbf(absPartIdx, ttype, trDepth)) @@ -316,15 +249,6 @@ } } -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -m_entropyCoder-resetBits(); -xEncIntraHeaderLuma(cu, trDepth, absPartIdx); -xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange); -xEncCoeffQTLuma(cu, trDepth, absPartIdx); -return