On Tue, Nov 17, 2020 at 8:22 AM Mahesh Pittala <mah...@multicorewareinc.com> wrote:
> From 787ae5da7431b5d113ea033cf6502ac1cc1e7572 Mon Sep 17 00:00:00 2001 > From: maheshpittala <mah...@multicorewareinc.com> > Date: Sun, 1 Nov 2020 10:09:28 +0530 > Subject: [PATCH] correct reusing cutree qp offsets in load encode for > reuse-level > 1 and < 10 for same resolution > > Earlier in save encode, dumped only best modes analysis data of that CTU > into file after encoding, not for each split CU's analysis. So in analysis > load, it reads the same best mode's qp value even for split CU's(whereas > split CU's qp would be different in save encode) and redo-analysis. > > So now, cuGeom.geomRecurId stores unique ID for each CU and even for > parents CU so based on this storing cutree qp offset and loaded same > [KS] Commit message sounds informal. Suggest rephrasing > --- > source/abrEncApp.cpp | 6 +++ > source/common/cudata.cpp | 6 ++- > source/common/cudata.h | 3 +- > source/encoder/analysis.cpp | 32 ++++++++++-- > source/encoder/api.cpp | 12 +++++ > source/encoder/encoder.cpp | 97 ++++++++++++++++++++++++++++++++---- > source/encoder/slicetype.cpp | 2 +- > source/x265.h | 2 + > 8 files changed, 140 insertions(+), 20 deletions(-) > > diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp > index cd85154f1..3550d8b11 100644 > --- a/source/abrEncApp.cpp > +++ b/source/abrEncApp.cpp > @@ -342,7 +342,10 @@ namespace X265_NS { > memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) > * src->depthBytes); > memcpy(intraDst->chromaModes, intraSrc->chromaModes, > sizeof(uint8_t) * src->depthBytes); > if (m_param->rc.cuTree) > + { > memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, > sizeof(int8_t) * src->depthBytes); > + memcpy(intraDst->cuQPOffReuse, intraSrc->cuQPOffReuse, > sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions)); > [KS] maximum number of qps saved per CTU is 85. Allocating copying numPartition size is unnecessary > + } > } > else > { > @@ -357,7 +360,10 @@ namespace X265_NS { > memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * > src->depthBytes); > memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * > src->depthBytes); > if (m_param->rc.cuTree) > + { > memcpy(interDst->cuQPOff, interSrc->cuQPOff, > sizeof(int8_t) * src->depthBytes); > + memcpy(interDst->cuQPOffReuse, interSrc->cuQPOffReuse, > sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions)); > + } > if (m_param->analysisSaveReuseLevel > 4) > { > memcpy(interDst->partSize, interSrc->partSize, > sizeof(uint8_t) * src->depthBytes); > diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp > index 19281dee2..08cdff11a 100644 > --- a/source/common/cudata.cpp > +++ b/source/common/cudata.cpp > @@ -194,6 +194,7 @@ void CUData::initialize(const CUDataMemPool& dataPool, > uint32_t depth, const x26 > > m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; > m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; > + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions; > [KS] Can you move this out of parentCTU? Would be appropriate to include it as an Analysis class member - just like other reuse parameters such as m_reuseRef, m_reuseDepth,.. > m_log2CUSize = charBuf; charBuf += m_numPartitions; > m_lumaIntraDir = charBuf; charBuf += m_numPartitions; > m_tqBypass = charBuf; charBuf += m_numPartitions; > @@ -235,6 +236,7 @@ void CUData::initialize(const CUDataMemPool& dataPool, > uint32_t depth, const x26 > > m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; > m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; > + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions; > m_log2CUSize = charBuf; charBuf += m_numPartitions; > m_lumaIntraDir = charBuf; charBuf += m_numPartitions; > m_tqBypass = charBuf; charBuf += m_numPartitions; > @@ -307,7 +309,7 @@ void CUData::initCTU(const Frame& frame, uint32_t > cuAddr, int qp, uint32_t first > X265_CHECK(!(frame.m_encData->m_param->bLossless && > !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without > TQbypass in PPS\n"); > > /* initialize the remaining CU data in one memset */ > - memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? > BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions); > + memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? > BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions); > > for (int8_t i = 0; i < NUM_TU_DEPTH; i++) > m_refTuDepth[i] = -1; > @@ -358,7 +360,7 @@ void CUData::initSubCU(const CUData& ctu, const > CUGeom& cuGeom, int qp) > m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); > > /* initialize the remaining CU data in one memset */ > - memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? > BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions); > + memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? > BytesPerPartition - 14 : BytesPerPartition - 10) * m_numPartitions); > memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); > } > > diff --git a/source/common/cudata.h b/source/common/cudata.h > index 8397f0568..d58f53e39 100644 > --- a/source/common/cudata.h > +++ b/source/common/cudata.h > @@ -192,6 +192,7 @@ public: > /* Per-part data, stored contiguously */ > int8_t* m_qp; // array of QP values > int8_t* m_qpAnalysis; // array of QP values for analysis > reuse > + int8_t* m_qpreuse; // array of QP values for analysis > reuse for reuse levels > 1 and < 10 > uint8_t* m_log2CUSize; // array of cu log2Size TODO: seems > redundant to depth > uint8_t* m_lumaIntraDir; // array of intra directions (luma) > uint8_t* m_tqBypass; // array of CU lossless flags > @@ -207,7 +208,7 @@ public: > uint8_t* m_transformSkip[3]; // array of transform skipping > flags per plane > uint8_t* m_cbf[3]; // array of coded block flags (CBF) > per plane > uint8_t* m_chromaIntraDir; // array of intra directions > (chroma) > - enum { BytesPerPartition = 24 }; // combined sizeof() of all > per-part data > + enum { BytesPerPartition = 25 }; // combined sizeof() of all > per-part data > > sse_t* m_distortion; > coeff_t* m_trCoeff[3]; // transformed coefficient buffer > per plane > diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp > index aabf386ca..b1d7e3ad1 100644 > --- a/source/encoder/analysis.cpp > +++ b/source/encoder/analysis.cpp > @@ -520,6 +520,9 @@ uint64_t Analysis::compressIntraCU(const CUData& > parentCTU, const CUGeom& cuGeom > bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); > bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); > > + if (m_param->rc.cuTree) > + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; > + > bool bAlreadyDecided = m_param->intraRefine != 4 && > parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && > !(m_param->bAnalysisType == HEVC_INFO); > bool bDecidedDepth = m_param->intraRefine != 4 && > parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; > int split = 0; > @@ -870,6 +873,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData& > parentCTU, const CUGeom& c > uint32_t minDepth = m_param->rdLevel <= 4 ? > topSkipMinDepth(parentCTU, cuGeom) : 0; > uint32_t splitRefs[4] = { 0, 0, 0, 0 }; > > + if (m_param->rc.cuTree) > + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; > + > X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not > support RD 0 or 1\n"); > > PMODE pmode(*this, cuGeom); > @@ -1152,6 +1158,8 @@ SplitData Analysis::compressInterCU_rd0_4(const > CUData& parentCTU, const CUGeom& > uint32_t cuAddr = parentCTU.m_cuAddr; > ModeDepth& md = m_modeDepth[depth]; > > + if (m_param->rc.cuTree) > + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; > > if (m_param->searchMethod == X265_SEA) > { > @@ -1856,6 +1864,9 @@ SplitData Analysis::compressInterCU_rd5_6(const > CUData& parentCTU, const CUGeom& > ModeDepth& md = m_modeDepth[depth]; > md.bestMode = NULL; > > + if (m_param->rc.cuTree) > + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; > + > if (m_param->searchMethod == X265_SEA) > { > int numPredDir = m_slice->isInterP() ? 1 : 2; > @@ -3643,15 +3654,26 @@ int Analysis::calculateQpforCuSize(const CUData& > ctu, const CUGeom& cuGeom, int3 > if ((distortionData->threshold[ctu.m_cuAddr] < 0.9 || > distortionData->threshold[ctu.m_cuAddr] > 1.1) > && distortionData->highDistortionCtuCount && > distortionData->lowDistortionCtuCount) > qp += distortionData->offset[ctu.m_cuAddr]; > - } > + } > > if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree) > { > - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + > cuGeom.absPartIdx; > - if (ctu.m_slice->m_sliceType == I_SLICE) > - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, > (int32_t)(qp + 0.5 + > ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx])); > + if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel > == 10) > + { > + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + > cuGeom.absPartIdx; > + if (ctu.m_slice->m_sliceType == I_SLICE) > + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, > (int32_t)(qp + 0.5 + > ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx])); > + else > + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, > (int32_t)(qp + 0.5 + > ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx])); > + } > else > - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, > (int32_t)(qp + 0.5 + > ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx])); > + { > + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + > cuGeom.geomRecurId; > + if (ctu.m_slice->m_sliceType == I_SLICE) > + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, > (int32_t)(qp + 0.5 + > ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOffReuse[cuIdx])); > + else > + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, > (int32_t)(qp + 0.5 + > ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOffReuse[cuIdx])); > + } > [KS] Why is this reuse not applicable to reuse level 1? > } > if (m_param->rc.hevcAq) > { > diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp > index a986355e0..0f266d328 100644 > --- a/source/encoder/api.cpp > +++ b/source/encoder/api.cpp > @@ -825,7 +825,10 @@ void x265_alloc_analysis_data(x265_param *param, > x265_analysis_data* analysis) > CHECKED_MALLOC_ZERO(intraData->partSizes, char, > analysis->numPartitions * analysis->numCUsInFrame); > CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, > analysis->numPartitions * analysis->numCUsInFrame); > if (param->rc.cuTree) > + { > CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, > analysis->numPartitions * analysis->numCUsInFrame); > + CHECKED_MALLOC_ZERO(intraData->cuQPOffReuse, int8_t, > analysis->numPartitions * analysis->numCUsInFrame); > + } > } > analysis->intraData = intraData; > > @@ -837,7 +840,10 @@ void x265_alloc_analysis_data(x265_param *param, > x265_analysis_data* analysis) > CHECKED_MALLOC_ZERO(interData->modes, uint8_t, > analysis->numPartitions * analysis->numCUsInFrame); > > if (param->rc.cuTree && !isMultiPassOpt) > + { > CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, > analysis->numPartitions * analysis->numCUsInFrame); > + CHECKED_MALLOC_ZERO(interData->cuQPOffReuse, int8_t, > analysis->numPartitions * analysis->numCUsInFrame); > + } > CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, > analysis->numPartitions * analysis->numCUsInFrame); > CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, > analysis->numPartitions * analysis->numCUsInFrame); > CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, > analysis->numPartitions * analysis->numCUsInFrame); > @@ -919,7 +925,10 @@ void x265_free_analysis_data(x265_param *param, > x265_analysis_data* analysis) > X265_FREE((analysis->intraData)->partSizes); > X265_FREE((analysis->intraData)->chromaModes); > if (param->rc.cuTree) > + { > X265_FREE((analysis->intraData)->cuQPOff); > + X265_FREE((analysis->intraData)->cuQPOffReuse); > + } > } > X265_FREE(analysis->intraData); > analysis->intraData = NULL; > @@ -931,7 +940,10 @@ void x265_free_analysis_data(x265_param *param, > x265_analysis_data* analysis) > X265_FREE((analysis->interData)->depth); > X265_FREE((analysis->interData)->modes); > if (!isMultiPassOpt && param->rc.cuTree) > + { > X265_FREE((analysis->interData)->cuQPOff); > + X265_FREE((analysis->interData)->cuQPOffReuse); > + } > X265_FREE((analysis->interData)->mvpIdx[0]); > X265_FREE((analysis->interData)->mvpIdx[1]); > X265_FREE((analysis->interData)->mv[0]); > diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp > index 1f710e1ce..9666744f3 100644 > --- a/source/encoder/encoder.cpp > +++ b/source/encoder/encoder.cpp > @@ -4452,19 +4452,25 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > return; > > uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, > *partSizes = NULL; > - int8_t *cuQPBuf = NULL; > + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL; > > tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); > depthBuf = tempBuf; > modeBuf = tempBuf + depthBytes; > partSizes = tempBuf + 2 * depthBytes; > if (m_param->rc.cuTree) > + { > cuQPBuf = X265_MALLOC(int8_t, depthBytes); > + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition * > analysis->numCUsInFrame); > [KS] Check whitespaces > + } > > X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->depth); > X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->chromaModes); > X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->partSizes); > - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), > depthBytes, m_analysisFileIn, intraPic->cuQPOff); } > + if (m_param->rc.cuTree) { > + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, > m_analysisFileIn, intraPic->cuQPOff); > + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), (scaledNumPartition > * analysis->numCUsInFrame), m_analysisFileIn, intraPic->cuQPOffReuse); > + } > > size_t count = 0; > for (uint32_t d = 0; d < depthBytes; d++) > @@ -4484,7 +4490,11 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > memset(&(analysis->intraData)->cuQPOff[count], > cuQPBuf[d], bytes); > count += bytes; > } > - > + if (m_param->rc.cuTree) > + { > + for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame); > i++) > + memset(&(analysis->intraData)->cuQPOffReuse[i], > cuQPReuseBuf[i], sizeof(int8_t)); > + } > if (!m_param->scaleFactor) > { > X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), > numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes); > @@ -4498,7 +4508,10 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > X265_FREE(tempLumaBuf); > } > if (m_param->rc.cuTree) > + { > X265_FREE(cuQPBuf); > + X265_FREE(cuQPReuseBuf); > + } > X265_FREE(tempBuf); > consumedBytes += frameRecordSize; > } > @@ -4515,7 +4528,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; > MV* mv[2]; > int8_t* refIdx[2]; > - int8_t* cuQPBuf = NULL; > + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL; > [KS] Why can't we reuse cuQPBuf ? I agree that the size of offsets differ for reuse level 10 and others but that can be taken care of in allocation. > > int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; > bool bIntraInInter = false; > @@ -4536,11 +4549,17 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > depthBuf = tempBuf; > modeBuf = tempBuf + depthBytes; > if (m_param->rc.cuTree) > + { > cuQPBuf = X265_MALLOC(int8_t, depthBytes); > + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition * > analysis->numCUsInFrame); > + } > > X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, interPic->depth); > X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, interPic->modes); > - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), > depthBytes, m_analysisFileIn, interPic->cuQPOff); } > + if (m_param->rc.cuTree) { > + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, > m_analysisFileIn, interPic->cuQPOff); > + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), > (scaledNumPartition * analysis->numCUsInFrame), m_analysisFileIn, > interPic->cuQPOffReuse); > + } > > if (m_param->analysisLoadReuseLevel > 4) > { > @@ -4611,9 +4630,17 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > } > count += bytes; > } > + if (m_param->rc.cuTree) > + { > + for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame); > i++) > + memset(&(analysis->interData)->cuQPOffReuse[i], > cuQPReuseBuf[i], sizeof(int8_t)); > + } > > if (m_param->rc.cuTree) > + { > X265_FREE(cuQPBuf); > + X265_FREE(cuQPReuseBuf); > + } > X265_FREE(tempBuf); > } > if (m_param->analysisLoadReuseLevel == 10) > @@ -4814,19 +4841,26 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > return; > > uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, > *partSizes = NULL; > - int8_t *cuQPBuf = NULL; > + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;; > > tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); > depthBuf = tempBuf; > modeBuf = tempBuf + depthBytes; > partSizes = tempBuf + 2 * depthBytes; > if (m_param->rc.cuTree) > + { > cuQPBuf = X265_MALLOC(int8_t, depthBytes); > + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions / > factor) * analysis->numCUsInFrame); > + } > > X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->depth); > X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->chromaModes); > X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->partSizes); > - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), > depthBytes, m_analysisFileIn, intraPic->cuQPOff); } > + if (m_param->rc.cuTree) > + { > + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, > m_analysisFileIn, intraPic->cuQPOff); > + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), > ((analysis->numPartitions / factor) * analysis->numCUsInFrame), > m_analysisFileIn, intraPic->cuQPOffReuse); > + } > > uint32_t count = 0; > for (uint32_t d = 0; d < depthBytes; d++) > @@ -4869,7 +4903,10 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > } > X265_FREE(tempLumaBuf); > if (m_param->rc.cuTree) > + { > X265_FREE(cuQPBuf); > + X265_FREE(cuQPReuseBuf); > + } > X265_FREE(tempBuf); > consumedBytes += frameRecordSize; > } > @@ -4886,7 +4923,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; > MV* mv[2]; > int8_t* refIdx[2]; > - int8_t* cuQPBuf = NULL; > + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL; > > int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; > bool bIntraInInter = false; > @@ -4901,11 +4938,18 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > depthBuf = tempBuf; > modeBuf = tempBuf + depthBytes; > if (m_param->rc.cuTree) > + { > cuQPBuf = X265_MALLOC(int8_t, depthBytes); > + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions / > factor) * analysis->numCUsInFrame); > + } > > X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, interPic->depth); > X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, interPic->modes); > - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), > depthBytes, m_analysisFileIn, interPic->cuQPOff); } > + if (m_param->rc.cuTree) > + { > + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, > m_analysisFileIn, interPic->cuQPOff); > + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), > (analysis->numPartitions / factor) * analysis->numCUsInFrame, > m_analysisFileIn, interPic->cuQPOffReuse); > + } > if (m_param->analysisLoadReuseLevel > 4) > { > partSize = modeBuf + depthBytes; > @@ -5017,7 +5061,16 @@ void Encoder::readAnalysisFile(x265_analysis_data* > analysis, int curPoc, const x > } > > if (m_param->rc.cuTree) > + { > + for (uint32_t i = 0; i < ((analysis->numPartitions / factor) > * analysis->numCUsInFrame); i++) > + memset(&(analysis->interData)->cuQPOffReuse[i], > cuQPReuseBuf[i], sizeof(int8_t)); > + } > + > + if (m_param->rc.cuTree) > + { > X265_FREE(cuQPBuf); > + X265_FREE(cuQPReuseBuf); > + } > X265_FREE(tempBuf); > > if (m_param->analysisLoadReuseLevel == 10) > @@ -5540,6 +5593,12 @@ void Encoder::writeAnalysisFile(x265_analysis_data* > analysis, FrameData &curEncD > intraDataCTU->cuQPOff[depthBytes] = > (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); > absPartIdx += ctu->m_numPartitions >> (depth * 2); > } > + > + if (m_param->rc.cuTree) > + { > + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j > = 0; j < ctu->m_numPartitions; i++, j++) > + intraDataCTU->cuQPOffReuse[i] = > (int8_t)(ctu->m_qpreuse[j] - baseQP); > + } > memcpy(&intraDataCTU->modes[ctu->m_cuAddr * > ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* > ctu->m_numPartitions); > } > } > @@ -5599,13 +5658,20 @@ void > Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD > } > absPartIdx += ctu->m_numPartitions >> (depth * 2); > } > + > + if (m_param->rc.cuTree) > + { > + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j > = 0; j < ctu->m_numPartitions; i++, j++) > + interDataCTU->cuQPOffReuse[i] = > (int8_t)(ctu->m_qpreuse[j] - baseQP); > + } > + > if (m_param->analysisSaveReuseLevel == 10 && > bIntraInInter) > memcpy(&intraDataCTU->modes[ctu->m_cuAddr * > ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* > ctu->m_numPartitions); > } > } > > if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType > == X265_TYPE_I) && m_param->rc.cuTree) > - analysis->frameRecordSize += sizeof(uint8_t)* > analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + > (sizeof(int8_t) * depthBytes); > + analysis->frameRecordSize += sizeof(uint8_t)* > analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + > (sizeof(int8_t) * depthBytes) + (sizeof(int8_t) * analysis->numPartitions > * analysis->numCUsInFrame); > else if (analysis->sliceType == X265_TYPE_IDR || > analysis->sliceType == X265_TYPE_I) > analysis->frameRecordSize += sizeof(uint8_t)* > analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3; > else > @@ -5613,7 +5679,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data* > analysis, FrameData &curEncD > /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */ > analysis->frameRecordSize += depthBytes * 2; > if (m_param->rc.cuTree) > - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes); > + { > + analysis->frameRecordSize += (sizeof(int8_t) * > depthBytes); > + analysis->frameRecordSize += (sizeof(int8_t) * > analysis->numPartitions * analysis->numCUsInFrame); > + } > if (m_param->analysisSaveReuseLevel > 4) > analysis->frameRecordSize += (depthBytes * 2); > > @@ -5669,7 +5738,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data* > analysis, FrameData &curEncD > X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t), > depthBytes, m_analysisFileOut); > X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), > depthBytes, m_analysisFileOut); > if (m_param->rc.cuTree) > + { > X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), > depthBytes, m_analysisFileOut); > + X265_FWRITE((analysis->intraData)->cuQPOffReuse, > sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions), > m_analysisFileOut); > + } > X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), > analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut); > } > else > @@ -5677,7 +5749,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data* > analysis, FrameData &curEncD > X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), > depthBytes, m_analysisFileOut); > X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), > depthBytes, m_analysisFileOut); > if (m_param->rc.cuTree) > + { > X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), > depthBytes, m_analysisFileOut); > + X265_FWRITE((analysis->interData)->cuQPOffReuse, > sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions), > m_analysisFileOut); > + } > if (m_param->analysisSaveReuseLevel > 4) > { > X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t), > depthBytes, m_analysisFileOut); > diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp > index 0adb0d0db..3bc01268b 100644 > --- a/source/encoder/slicetype.cpp > +++ b/source/encoder/slicetype.cpp > @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, > bool bKeyframe) > > if (!framecnt) > { > - if (m_param->rc.cuTree) > + if (m_param->rc.cuTree && !m_param->analysisLoad) > cuTree(frames, 0, bKeyframe); > return; > } > diff --git a/source/x265.h b/source/x265.h > index f44040ba7..d6a828539 100644 > --- a/source/x265.h > +++ b/source/x265.h > @@ -145,6 +145,7 @@ typedef struct x265_analysis_intra_data > char* partSizes; > uint8_t* chromaModes; > int8_t* cuQPOff; > + int8_t* cuQPOffReuse; > }x265_analysis_intra_data; > > typedef struct x265_analysis_MV > @@ -170,6 +171,7 @@ typedef struct x265_analysis_inter_data > x265_analysis_MV* mv[2]; > int64_t* sadCost; > int8_t* cuQPOff; > + int8_t* cuQPOffReuse; > }x265_analysis_inter_data; > > typedef struct x265_weight_param > -- > 2.23.0.windows.1 > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- Regards, Kavitha
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel