Re: [x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless
Thanks, Min. This is a solution but will affect performance with an extra TComDataCU*. I have sent another patch where I'm just re-encoding the CU if lossless is chosen as the best mode. This will not affect normal analysis. Can you review that? Deepthi On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho st...@borho.org wrote: # HG changeset patch # User Min Chen chenm...@163.com # Date 1409002891 18000 # Mon Aug 25 16:41:31 2014 -0500 # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264 # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b analysis: fix inter hash mistake with --cu-lossless diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -2293,7 +2293,7 @@ * \returns void */ void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, - ShortYuv* outBestResiYuv, TComYuv* outReconYuv) + ShortYuv* outBestResiYuv, TComYuv* outReconYuv, TComDataCU* tmpCu) { X265_CHECK(!cu-isIntra(0), intra CU not expected\n); @@ -2321,6 +2321,7 @@ } uint64_t bestCost = MAX_INT64; +bool bestTransquantBypassFlag = bIsTQBypassEnable; for (uint32_t modeId = 0; modeId numModes; modeId++) { @@ -2388,15 +2389,29 @@ if (cu-getQtRootCbf(0)) xSetResidualQTData(cu, 0, outBestResiYuv, depth, true); +bestTransquantBypassFlag = bIsLosslessMode; bestBits = bits; bestCost = cost; bestCoeffBits = cu-m_coeffBits; m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]); } + +// Save lossless mode coeff +if (bIsLosslessMode) +{ +tmpCu-copyPartFrom(cu, 0, depth, false); +} } X265_CHECK(bestCost != MAX_INT64, no best cost\n); +if (bestTransquantBypassFlag !m_param-bLossless) +{ +assert(log2CUSize 2); +cu-setCUTransquantBypassSubParts(true, 0, depth); +cu-copyPartFrom(tmpCu, 0, depth, false); +} + if (cu-getQtRootCbf(0)) outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize); else diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 16:41:31 2014 -0500 @@ -147,7 +147,7 @@ /// encode residual and compute rd-cost for inter mode void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, - TComYuv* reconYuv); + TComYuv* reconYuv, TComDataCU* tmpCu); void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv); void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/encoder/analysis.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -82,7 +82,7 @@ uint32_t sizeL = cuSize * cuSize; uint32_t sizeC = sizeL (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp)); -ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, tqBypass); +ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, tqBypass); m_interCU_2Nx2N[i] = new TComDataCU; m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize, csp, 0, tqBypass); @@ -108,6 +108,9 @@ m_tempCU[i] = new TComDataCU; m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7, tqBypass); +m_tempLosslessCU[i] = new TComDataCU; +m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 8, tqBypass); + m_bestPredYuv[i] = new TComYuv; ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp); @@ -158,6 +161,7 @@ delete m_bestMergeCU[i]; delete m_bestCU[i]; delete m_tempCU[i]; +delete m_tempLosslessCU[i]; if (m_bestPredYuv m_bestPredYuv[i]) { @@ -240,6 +244,7 @@ // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); m_tempCU[0]-initCU(cu-m_pic, cu-getAddr()); +m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr()); // analysis of CU uint32_t numPartition = cu-getTotalNumPart(); @@ -394,6 +399,7 @@ uint32_tnextDepth = depth + 1; TComDataCU* subBestPartCU = m_bestCU[nextDepth]; TComDataCU* subTempPartCU = m_tempCU[nextDepth]; +TComDataCU*
[x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless
# HG changeset patch # User Min Chen chenm...@163.com # Date 1409002891 18000 # Mon Aug 25 16:41:31 2014 -0500 # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264 # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b analysis: fix inter hash mistake with --cu-lossless diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -2293,7 +2293,7 @@ * \returns void */ void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, - ShortYuv* outBestResiYuv, TComYuv* outReconYuv) + ShortYuv* outBestResiYuv, TComYuv* outReconYuv, TComDataCU* tmpCu) { X265_CHECK(!cu-isIntra(0), intra CU not expected\n); @@ -2321,6 +2321,7 @@ } uint64_t bestCost = MAX_INT64; +bool bestTransquantBypassFlag = bIsTQBypassEnable; for (uint32_t modeId = 0; modeId numModes; modeId++) { @@ -2388,15 +2389,29 @@ if (cu-getQtRootCbf(0)) xSetResidualQTData(cu, 0, outBestResiYuv, depth, true); +bestTransquantBypassFlag = bIsLosslessMode; bestBits = bits; bestCost = cost; bestCoeffBits = cu-m_coeffBits; m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]); } + +// Save lossless mode coeff +if (bIsLosslessMode) +{ +tmpCu-copyPartFrom(cu, 0, depth, false); +} } X265_CHECK(bestCost != MAX_INT64, no best cost\n); +if (bestTransquantBypassFlag !m_param-bLossless) +{ +assert(log2CUSize 2); +cu-setCUTransquantBypassSubParts(true, 0, depth); +cu-copyPartFrom(tmpCu, 0, depth, false); +} + if (cu-getQtRootCbf(0)) outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize); else diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 16:41:31 2014 -0500 @@ -147,7 +147,7 @@ /// encode residual and compute rd-cost for inter mode void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, - TComYuv* reconYuv); + TComYuv* reconYuv, TComDataCU* tmpCu); void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv); void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/encoder/analysis.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -82,7 +82,7 @@ uint32_t sizeL = cuSize * cuSize; uint32_t sizeC = sizeL (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp)); -ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, tqBypass); +ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, tqBypass); m_interCU_2Nx2N[i] = new TComDataCU; m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize, csp, 0, tqBypass); @@ -108,6 +108,9 @@ m_tempCU[i] = new TComDataCU; m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7, tqBypass); +m_tempLosslessCU[i] = new TComDataCU; +m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 8, tqBypass); + m_bestPredYuv[i] = new TComYuv; ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp); @@ -158,6 +161,7 @@ delete m_bestMergeCU[i]; delete m_bestCU[i]; delete m_tempCU[i]; +delete m_tempLosslessCU[i]; if (m_bestPredYuv m_bestPredYuv[i]) { @@ -240,6 +244,7 @@ // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); m_tempCU[0]-initCU(cu-m_pic, cu-getAddr()); +m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr()); // analysis of CU uint32_t numPartition = cu-getTotalNumPart(); @@ -394,6 +399,7 @@ uint32_tnextDepth = depth + 1; TComDataCU* subBestPartCU = m_bestCU[nextDepth]; TComDataCU* subTempPartCU = m_tempCU[nextDepth]; +TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth]; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++) { int qp = outTempCU-getQP(0); @@ -404,6 +410,7 @@ (subBestPartCU-getCUPelY() slice-m_sps-picHeightInLumaSamples))) { subTempPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init. +