[x265] [PATCH] psy-rd: implement psy-rd in rdlevel=4,3 and 2
# HG changeset patch # User Sumalatha Polureddysumala...@multicorewareinc.com # Date 1403689018 -19800 # Node ID 6ef75d4d64dc98194a1c90a952ce14677bfbcb78 # Parent 613bfe5cd169c3accb4646891f904735ba21290d psy-rd: implement psy-rd in rdlevel=4,3 and 2 diff -r 613bfe5cd169 -r 6ef75d4d64dc source/encoder/compress.cpp --- a/source/encoder/compress.cpp Tue Jun 24 16:39:42 2014 +0530 +++ b/source/encoder/compress.cpp Wed Jun 25 15:06:58 2014 +0530 @@ -72,7 +72,17 @@ m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); -cu-m_totalRDCost = m_rdCost-calcRdCost(cu-m_totalDistortion, cu-m_totalBits); +if (m_rdCost-psyRdEnabled()) +{ +int part = g_convertToBit[cu-getCUSize(0)]; +cu-m_psyEnergy = m_rdCost-psyCost(part, m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(), +m_tmpRecoYuv[depth]-getLumaAddr(), m_tmpRecoYuv[depth]-getStride()); +cu-m_totalPsyCost = m_rdCost-calcPsyRdCost(cu-m_totalDistortion, cu-m_totalBits, cu-m_psyEnergy); +} +else +{ +cu-m_totalRDCost = m_rdCost-calcRdCost(cu-m_totalDistortion, cu-m_totalBits); +} } void TEncCu::xComputeCostIntraInInter(TComDataCU* cu, PartSize partSize) @@ -321,7 +331,9 @@ //Encode with residue m_search-encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false, true); -if (outTempCU-m_totalRDCost outBestCU-m_totalRDCost) //Choose best from no-residue mode and residue mode +uint64_t tempCost = m_rdCost-psyRdEnabled() ? outTempCU-m_totalPsyCost : outTempCU-m_totalRDCost; +uint64_t bestCost = m_rdCost-psyRdEnabled() ? outBestCU-m_totalPsyCost : outBestCU-m_totalRDCost; +if (tempCost bestCost) //Choose best from no-residue mode and residue mode { TComDataCU* tmp = outTempCU; outTempCU = outBestCU; @@ -485,7 +497,9 @@ m_search-encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_bestRecoYuv[depth], false, true); -if (m_bestMergeCU[depth]-m_totalRDCost outBestCU-m_totalRDCost) +uint64_t bestMergeCost = m_rdCost-psyRdEnabled() ? m_bestMergeCU[depth]-m_totalPsyCost : m_bestMergeCU[depth]-m_totalRDCost; +uint64_t bestCost = m_rdCost-psyRdEnabled() ? outBestCU-m_totalPsyCost : outBestCU-m_totalRDCost; +if (bestMergeCost bestCost) { outBestCU = m_bestMergeCU[depth]; tempYuv = m_modePredYuv[3][depth]; @@ -512,12 +526,21 @@ if (bdoIntra) { xComputeCostIntraInInter(m_intraInInterCU[depth], SIZE_2Nx2N); +uint64_t intraInInterCost, bestCost; if (m_param-rdLevel 2) { xEncodeIntraInInter(m_intraInInterCU[depth], m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]); +intraInInterCost = m_rdCost-psyRdEnabled() ? m_intraInInterCU[depth]-m_totalPsyCost : m_intraInInterCU[depth]-m_totalRDCost; +bestCost = m_rdCost-psyRdEnabled() ? outBestCU-m_totalPsyCost : outBestCU-m_totalRDCost; } -if (m_intraInInterCU[depth]-m_totalRDCost outBestCU-m_totalRDCost) +else +{ +intraInInterCost = m_intraInInterCU[depth]-m_totalRDCost; +bestCost = outBestCU-m_totalRDCost; + +} +if (intraInInterCost bestCost) { outBestCU = m_intraInInterCU[depth]; tempYuv = m_modePredYuv[5][depth]; @@ -625,7 +648,15 @@ m_entropyCoder-resetBits(); m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth); outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits -outBestCU-m_totalRDCost = m_rdCost-calcRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits); +if (m_rdCost-psyRdEnabled()) +{ +outBestCU-m_totalPsyCost = m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits, +outBestCU-m_psyEnergy); +} +else +{ +outBestCU-m_totalRDCost = m_rdCost-calcRdCost(outBestCU-m_totalDistortion,
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
On 6/25/2014 1:22 AM, deep...@multicorewareinc.com wrote: +/* In 444, chroma gets twice as much resolution, so halve quality when psy-rd is enabled */ +if (p-internalCsp == X265_CSP_I444 p-psyRd) +{ +p-cbQpOffset += 6; +p-crQpOffset += 6; +} I dont really understand what the reasoning is for this? Is it just to make it fit with the model psy-rd is currently using? - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
This is primarily a visual quality improvement/psy-rd hack. In 444, since chroma resolution is on par with luma, and our eyes arent very sensitive to chroma, we increase the chroma QP so that those bits can be used up in luma. On Wed, Jun 25, 2014 at 4:35 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On 6/25/2014 1:22 AM, deep...@multicorewareinc.com wrote: +/* In 444, chroma gets twice as much resolution, so halve quality when psy-rd is enabled */ +if (p-internalCsp == X265_CSP_I444 p-psyRd) +{ +p-cbQpOffset += 6; +p-crQpOffset += 6; +} I dont really understand what the reasoning is for this? Is it just to make it fit with the model psy-rd is currently using? - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
In a sense, psy-rd encapsulates all those r-d algorithms/tweaks/hacks that improve visual quality but may hurt objective metrics like psnr/ssim. In 444, this qp hack is likely to hurt objective metrics, hence it's turned on only if psychovisual improvement is desired. On Jun 25, 2014 7:02 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On 6/25/2014 12:10 PM, Deepthi Nandakumar wrote: This is primarily a visual quality improvement/psy-rd hack. In 444, since chroma resolution is on par with luma, and our eyes arent very sensitive to chroma, we increase the chroma QP so that those bits can be used up in luma. Yah I get the idea of a chroma qp offset, I'm just wondering why it is specific to psy-rd? Cheers, - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 1 of 3] rc: initalize states to hold frame statistics used in two pass
# HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1403716605 -19800 # Wed Jun 25 22:46:45 2014 +0530 # Node ID e71e34d02de228eab43edf1910a71a44417d # Parent 09450ac6dc7d0f495582bf327488612755df1719 rc: initalize states to hold frame statistics used in two pass frame stats includes mv bits,DC coeff bits and number of Intra, Inter and Skip Cus per frame. diff -r 09450ac6dc7d -r e71e34d02d77 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Tue Jun 24 15:41:55 2014 +0900 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Wed Jun 25 22:46:45 2014 +0530 @@ -252,6 +252,8 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_mvBits = 0; +m_coeffBits= 0; m_numPartitions= pic-getNumPartInCU(); char* qp = pic-getCU(getAddr())-getQP(); m_baseQp = pic-getCU(getAddr())-m_baseQp; @@ -345,6 +347,8 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_mvBits = 0; +m_coeffBits= 0; uint8_t cuSize = g_maxCUSize depth; @@ -383,6 +387,8 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_mvBits = 0; +m_coeffBits= 0; uint8_t cuSize = g_maxCUSize depth; @@ -433,6 +439,9 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_mvBits = 0; +m_coeffBits= 0; + m_numPartitions= cu-getTotalNumPart() 2; for (int i = 0; i 4; i++) @@ -499,6 +508,8 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_mvBits = 0; +m_coeffBits= 0; m_numPartitions= cu-getTotalNumPart() 2; for (int i = 0; i 4; i++) @@ -564,6 +575,8 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_mvBits = 0; +m_coeffBits= 0; m_numPartitions= cu-getTotalNumPart() 2; TComDataCU* otherCU = m_pic-getCU(m_cuAddr); @@ -598,6 +611,8 @@ m_psyEnergy+= cu-m_psyEnergy; m_totalDistortion += cu-m_totalDistortion; m_totalBits+= cu-m_totalBits; +m_mvBits += cu-m_mvBits; +m_coeffBits+= cu-m_coeffBits; uint32_t offset = cu-getTotalNumPart() * partUnitIdx; uint32_t numPartition = cu-getTotalNumPart(); @@ -660,6 +675,8 @@ cu-m_totalRDCost = m_totalRDCost; cu-m_totalDistortion = m_totalDistortion; cu-m_totalBits = m_totalBits; +cu-m_mvBits = m_mvBits; +cu-m_coeffBits = m_coeffBits; int sizeInBool = sizeof(bool) * m_numPartitions; int sizeInChar = sizeof(char) * m_numPartitions; @@ -747,6 +764,8 @@ cu-m_totalRDCost = m_totalRDCost; cu-m_totalDistortion = m_totalDistortion; cu-m_totalBits = m_totalBits; +cu-m_mvBits = m_mvBits; +cu-m_coeffBits = m_coeffBits; int sizeInBool = sizeof(bool) * qNumPart; int sizeInChar = sizeof(char) * qNumPart; diff -r 09450ac6dc7d -r e71e34d02d77 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hTue Jun 24 15:41:55 2014 +0900 +++ b/source/Lib/TLibCommon/TComDataCU.hWed Jun 25 22:46:45 2014 +0530 @@ -202,6 +202,8 @@ uint32_t m_count[4]; uint64_t m_sa8dCost; doublem_baseQp; //Qp of Cu set from RateControl/Vbv. +uint32_t m_mvBits; // Mv bits + Ref + block type +uint32_t m_coeffBits;// Texture bits (DCT Coeffs) // --- // create / destroy / initialize / copy diff -r 09450ac6dc7d -r e71e34d02d77 source/common/frame.cpp --- a/source/common/frame.cpp Tue Jun 24 15:41:55 2014 +0900 +++ b/source/common/frame.cpp Wed Jun 25 22:46:45 2014 +0530 @@ -45,6 +45,7 @@ m_reconRowCount.set(0); m_countRefEncoders = 0; memset(m_lowres, 0, sizeof(m_lowres)); +memset(m_stats, 0, sizeof(m_stats)); m_next = NULL; m_prev = NULL; m_qpaAq = NULL; @@ -134,6 +135,7 @@ } if (param-rc.aqMode) memset(m_qpaAq, 0, numRows * sizeof(double)); +memset(m_stats, 0, sizeof(m_stats)); } void Frame::destroy() diff -r 09450ac6dc7d -r e71e34d02d77 source/common/frame.h --- a/source/common/frame.h Tue Jun 24 15:41:55 2014 +0900 +++ b/source/common/frame.h Wed Jun 25 22:46:45 2014 +0530 @@ -37,6 +37,20 @@ class Encoder; +/* Current frame stats for 2 pass */ +struct FrameStats +{ +/* MV bits (MV+Ref+Block Type) */ +int mvBits; +/* Texture bits (DCT coefs) */ +int coeffBits; +int miscBits; +/* CU type counts */ +int cuCount_i; +
[x265] [PATCH 3 of 3] rc: rename texBits to coeffBits in RateControlEntry structure to maintain uniformity
# HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1403716778 -19800 # Wed Jun 25 22:49:38 2014 +0530 # Node ID dd4ca794e746ecb04eb76dfdb32e9ef16f642e77 # Parent 0995efabd44470c1192994e1aceeb40ae606467f rc: rename texBits to coeffBits in RateControlEntry structure to maintain uniformity. diff -r 0995efabd444 -r dd4ca794e746 source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppWed Jun 25 22:48:55 2014 +0530 +++ b/source/encoder/ratecontrol.cppWed Jun 25 22:49:38 2014 +0530 @@ -692,8 +692,8 @@ m_shortTermCplxCount *= 0.5; m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION); m_shortTermCplxCount++; -/* texBits to be used in 2-pass */ -rce-texBits = m_currentSatd; +/* coeffBits to be used in 2-pass */ +rce-coeffBits = m_currentSatd; rce-blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount; rce-mvBits = 0; rce-sliceType = m_sliceType; diff -r 0995efabd444 -r dd4ca794e746 source/encoder/ratecontrol.h --- a/source/encoder/ratecontrol.h Wed Jun 25 22:48:55 2014 +0530 +++ b/source/encoder/ratecontrol.h Wed Jun 25 22:49:38 2014 +0530 @@ -53,7 +53,7 @@ struct RateControlEntry { -int64_t texBits; /* Required in 2-pass rate control */ +int64_t coeffBits; /* Required in 2-pass rate control */ int64_t lastSatd; /* Contains the picture cost of the previous frame, required for resetAbr and VBV */ int sliceType; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 2 of 3] rc: accumulate mv bits, coeff bits per frame
# HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1403716735 -19800 # Wed Jun 25 22:48:55 2014 +0530 # Node ID 0995efabd44470c1192994e1aceeb40ae606467f # Parent e71e34d02de228eab43edf1910a71a44417d rc: accumulate mv bits, coeff bits per frame. diff -r e71e34d02d77 -r 0995efabd444 source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:48:55 2014 +0530 @@ -555,6 +555,7 @@ m_entropyCoder-resetBits(); m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth); outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits + if (m_rdCost-psyRdEnabled()) { outBestCU-m_totalPsyCost = m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits, @@ -616,7 +617,7 @@ m_entropyCoder-encodeSplitFlag(outTempCU, 0, depth); outTempCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits } - + if (m_rdCost-psyRdEnabled()) { outTempCU-m_totalPsyCost = m_rdCost-calcPsyRdCost(outTempCU-m_totalDistortion, outTempCU-m_totalBits, @@ -907,6 +908,7 @@ m_entropyCoder-resetBits(); m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth); outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits + if (m_rdCost-psyRdEnabled()) { outBestCU-m_totalPsyCost = m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits, @@ -1168,6 +1170,7 @@ return; } + if (slice-getPPS()-getTransquantBypassEnableFlag()) { m_entropyCoder-encodeCUTransquantBypassFlag(cu, absPartIdx); @@ -1390,12 +1393,14 @@ m_entropyCoder-encodePartSize(outTempCU, 0, depth); m_entropyCoder-encodePredInfo(outTempCU, 0); m_entropyCoder-encodeIPCMInfo(outTempCU, 0); +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); // Encode Coefficients bool bCodeDQP = getdQPFlag(); m_entropyCoder-encodeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bCodeDQP); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; if (m_rdCost-psyRdEnabled()) { @@ -1437,12 +1442,13 @@ m_entropyCoder-encodePartSize(outTempCU, 0, depth); m_entropyCoder-encodePredInfo(outTempCU, 0); m_entropyCoder-encodeIPCMInfo(outTempCU, 0); - +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); // Encode Coefficients bool bCodeDQP = getdQPFlag(); m_entropyCoder-encodeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bCodeDQP); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; if (m_rdCost-psyRdEnabled()) { @@ -1492,12 +1498,14 @@ m_entropyCoder-encodeSkipFlag(outTempCU, 0); m_entropyCoder-encodePredMode(outTempCU, 0); m_entropyCoder-encodePartSize(outTempCU, 0, depth); +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); m_entropyCoder-encodeIPCMInfo(outTempCU, 0); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); outTempCU-m_totalRDCost = m_rdCost-calcRdCost(outTempCU-m_totalDistortion, outTempCU-m_totalBits); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; xCheckDQP(outTempCU); xCheckBestMode(outBestCU, outTempCU, depth); diff -r e71e34d02d77 -r 0995efabd444 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:48:55 2014 +0530 @@ -4211,6 +4211,7 @@ } m_entropyCoder-encodeSkipFlag(cu, 0); m_entropyCoder-encodeMergeIndex(cu, 0); +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); return m_entropyCoder-getNumberOfWrittenBits(); } else @@ -4225,8 +4226,11 @@ m_entropyCoder-encodePartSize(cu, 0, cu-getDepth(0)); m_entropyCoder-encodePredInfo(cu, 0); bool bDummy = false; +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); m_entropyCoder-encodeCoeff(cu, 0, cu-getDepth(0), cu-getCUSize(0), bDummy); -return m_entropyCoder-getNumberOfWrittenBits(); +int totalBits = m_entropyCoder-getNumberOfWrittenBits(); +cu-m_coeffBits = totalBits - cu-m_mvBits; +return totalBits; } } diff -r e71e34d02d77 -r 0995efabd444 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Wed Jun 25 22:46:45 2014 +0530 +++
[x265] [PATCH 0 of 3 ] collect split up of texture bits and mv bits at frame level
___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
On 6/25/2014 3:45 PM, Deepthi Nandakumar wrote: In a sense, psy-rd encapsulates all those r-d algorithms/tweaks/hacks that improve visual quality but may hurt objective metrics like psnr/ssim. In 444, this qp hack is likely to hurt objective metrics, hence it's turned on only if psychovisual improvement is desired. OK. Was curious, since these are not lumped in with psy-rd in x264. - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
On Wed, 25 Jun 2014 23:18:48 +0530, Deepthi Nandakumar wrote: They are. encoder.c:x264_validate_parameters if( b_open i_csp = X264_CSP_I444 i_csp X264_CSP_BGR h-param.analyse.b_psy ) h-param.analyse.i_chroma_qp_offset += 6; On Wed, Jun 25, 2014 at 11:12 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On 6/25/2014 3:45 PM, Deepthi Nandakumar wrote: In a sense, psy-rd encapsulates all those r-d algorithms/tweaks/hacks that improve visual quality but may hurt objective metrics like psnr/ssim. In 444, this qp hack is likely to hurt objective metrics, hence it's turned on only if psychovisual improvement is desired. OK. Was curious, since these are not lumped in with psy-rd in x264. That is separate --psy (--no-psy) option in x264 and not --psy-rd ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 2 of 2] pass TLD into class FrameFilter
On Tue, Jun 24, 2014 at 5:36 AM, Min Chen chenm...@163.com wrote: # HG changeset patch # User Min Chen chenm...@163.com # Date 1403568362 25200 # Node ID efa48bc0245bded1418db3c42b042acb9969146c # Parent 12c1d8aaa8f56a8f2de74c8ff1451d99d04c817d pass TLD into class FrameFilter diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/cturow.h --- a/source/encoder/cturow.h Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/cturow.h Mon Jun 23 17:06:02 2014 -0700 @@ -47,6 +47,10 @@ RDCost m_rdCost; TComTrQuant m_trQuant; +// NOTE: the maximum LCU 64x64 have 256 partitions +boolm_edgeFilter[256]; +uint8_t m_blockingStrength[256]; + void init(Encoder); ~ThreadLocalData(); }; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/encoder.cppMon Jun 23 17:06:02 2014 -0700 @@ -42,6 +42,7 @@ #include x265.h using namespace x265; +ThreadLocalData* Encoder::m_threadLocalData; Encoder::Encoder() { @@ -194,9 +195,10 @@ if (m_frameEncoder) { int numRows = (m_param-sourceHeight + g_maxCUSize - 1) / g_maxCUSize; +int numCols = (m_param-sourceWidth + g_maxCUSize - 1) / g_maxCUSize; for (int i = 0; i m_param-frameNumThreads; i++) { -if (!m_frameEncoder[i].init(this, numRows)) +if (!m_frameEncoder[i].init(this, numRows, numCols)) { x265_log(m_param, X265_LOG_ERROR, Unable to initialize frame encoder, aborting\n); m_aborted = true; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/encoder.h --- a/source/encoder/encoder.h Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/encoder.h Mon Jun 23 17:06:02 2014 -0700 @@ -175,7 +175,7 @@ x265_param*m_param; RateControl* m_rateControl; -ThreadLocalData* m_threadLocalData; +static ThreadLocalData* m_threadLocalData; bool m_bEnableRDOQ; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/frameencoder.cpp Mon Jun 23 17:06:02 2014 -0700 @@ -80,15 +80,17 @@ stop(); } -bool FrameEncoder::init(Encoder *top, int numRows) +bool FrameEncoder::init(Encoder *top, int numRows, int numCols) { bool ok = true; m_top = top; m_param = top-m_param; m_numRows = numRows; +m_numCols = numCols; m_filterRowDelay = (m_param-saoLcuBasedOptimization m_param-saoLcuBoundary) ? 2 : (m_param-bEnableSAO || m_param-bEnableLoopFilter ? 1 : 0); +m_filterRowDelayCus = m_filterRowDelay * numCols; m_rows = new CTURow[m_numRows]; for (int i = 0; i m_numRows; ++i) @@ -505,7 +507,7 @@ // Extend border after whole-frame SAO is finished for (int row = 0; row m_numRows; row++) { -m_frameFilter.processRowPost(row); +m_frameFilter.processRowPost(row, 0); } } @@ -845,7 +847,7 @@ } // setup thread-local data -ThreadLocalData tld = threadId = 0 ? m_top-m_threadLocalData[threadId] : m_tld; +ThreadLocalData tld = threadId = 0 ? Encoder::m_threadLocalData[threadId] : m_tld; tld.m_trQuant.m_nr = m_nr; tld.m_search.m_mref = m_mref; codeRow.setThreadLocalData(tld); @@ -856,7 +858,8 @@ tld.m_cuCoder.m_log = tld.m_cuCoder.m_sliceTypeLog[m_frame-getSlice()-getSliceType()]; int64_t startTime = x265_mdate(); -const uint32_t numCols = m_frame-getPicSym()-getFrameWidthInCU(); +assert(m_frame-getPicSym()-getFrameWidthInCU() == m_numCols); +const uint32_t numCols = m_numCols; const uint32_t lineStartCUAddr = row * numCols; bool bIsVbv = m_param-rc.vbvBufferSize 0 m_param-rc.vbvMaxBitrate 0; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/frameencoder.h Mon Jun 23 17:06:02 2014 -0700 @@ -65,15 +65,15 @@ void setThreadPool(ThreadPool *p); -bool init(Encoder *top, int numRows); +bool init(Encoder *top, int numRows, int numCols); void destroy(); void processRowEncoder(int row, const int threadId); -void processRowFilter(int row) +void processRowFilter(int row, const int threadId) { -m_frameFilter.processRow(row); +m_frameFilter.processRow(row, threadId); } void enqueueRowEncoder(int row) @@ -108,7 +108,7 @@ } else { -processRowFilter(realRow); +processRowFilter(realRow, threadId); // NOTE: Active next row if (realRow != m_numRows - 1) @@ -154,6 +154,7 @@ bool