On Fri, Nov 20, 2020 at 3:32 PM Kavitha Sampath < kavi...@multicorewareinc.com> wrote:
> > > On Tue, Nov 17, 2020 at 8:22 AM Mahesh Pittala < > mah...@multicorewareinc.com> wrote: > >> From 787ae5da7431b5d113ea033cf6502ac1cc1e7572 Mon Sep 17 00:00:00 2001 >> From: maheshpittala <mah...@multicorewareinc.com> >> Date: Sun, 1 Nov 2020 10:09:28 +0530 >> Subject: [PATCH] correct reusing cutree qp offsets in load encode for >> reuse-level > 1 and < 10 for same resolution >> >> Earlier in save encode, dumped only best modes analysis data of that CTU >> into file after encoding, not for each split CU's analysis. So in analysis >> load, it reads the same best mode's qp value even for split CU's(whereas >> split CU's qp would be different in save encode) and redo-analysis. >> >> So now, cuGeom.geomRecurId stores unique ID for each CU and even for >> parents CU so based on this storing cutree qp offset and loaded same >> > [KS] Commit message sounds informal. Suggest rephrasing > [SK] Addressed the same. > --- >> source/abrEncApp.cpp | 6 +++ >> source/common/cudata.cpp | 6 ++- >> source/common/cudata.h | 3 +- >> source/encoder/analysis.cpp | 32 ++++++++++-- >> source/encoder/api.cpp | 12 +++++ >> source/encoder/encoder.cpp | 97 ++++++++++++++++++++++++++++++++---- >> source/encoder/slicetype.cpp | 2 +- >> source/x265.h | 2 + >> 8 files changed, 140 insertions(+), 20 deletions(-) >> >> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp >> index cd85154f1..3550d8b11 100644 >> --- a/source/abrEncApp.cpp >> +++ b/source/abrEncApp.cpp >> @@ -342,7 +342,10 @@ namespace X265_NS { >> memcpy(intraDst->partSizes, intraSrc->partSizes, >> sizeof(char) * src->depthBytes); >> memcpy(intraDst->chromaModes, intraSrc->chromaModes, >> sizeof(uint8_t) * src->depthBytes); >> if (m_param->rc.cuTree) >> + { >> memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >> sizeof(int8_t) * src->depthBytes); >> + memcpy(intraDst->cuQPOffReuse, intraSrc->cuQPOffReuse, >> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions)); >> > [KS] maximum number of qp's saved per CTU is 85. Allocating copying > numPartition size is unnecessary > [SK] Agreed. Fixed the same. > + } >> } >> else >> { >> @@ -357,7 +360,10 @@ namespace X265_NS { >> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * >> src->depthBytes); >> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * >> src->depthBytes); >> if (m_param->rc.cuTree) >> + { >> memcpy(interDst->cuQPOff, interSrc->cuQPOff, >> sizeof(int8_t) * src->depthBytes); >> + memcpy(interDst->cuQPOffReuse, interSrc->cuQPOffReuse, >> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions)); >> + } >> if (m_param->analysisSaveReuseLevel > 4) >> { >> memcpy(interDst->partSize, interSrc->partSize, >> sizeof(uint8_t) * src->depthBytes); >> diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp >> index 19281dee2..08cdff11a 100644 >> --- a/source/common/cudata.cpp >> +++ b/source/common/cudata.cpp >> @@ -194,6 +194,7 @@ void CUData::initialize(const CUDataMemPool& >> dataPool, uint32_t depth, const x26 >> >> m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; >> m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; >> + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions; >> > [KS] Can you move this out of parentCTU? Would be appropriate to include > it as an Analysis class member - just like other reuse parameters such > as m_reuseRef, m_reuseDepth,.. > [SK] addressed the same so that cudata mem pool can be used for other purposes. We will store the offsets only in the frame's analysis data structures. > > >> m_log2CUSize = charBuf; charBuf += m_numPartitions; >> m_lumaIntraDir = charBuf; charBuf += m_numPartitions; >> m_tqBypass = charBuf; charBuf += m_numPartitions; >> @@ -235,6 +236,7 @@ void CUData::initialize(const CUDataMemPool& >> dataPool, uint32_t depth, const x26 >> >> m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; >> m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; >> + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions; >> m_log2CUSize = charBuf; charBuf += m_numPartitions; >> m_lumaIntraDir = charBuf; charBuf += m_numPartitions; >> m_tqBypass = charBuf; charBuf += m_numPartitions; >> @@ -307,7 +309,7 @@ void CUData::initCTU(const Frame& frame, uint32_t >> cuAddr, int qp, uint32_t first >> X265_CHECK(!(frame.m_encData->m_param->bLossless && >> !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without >> TQbypass in PPS\n"); >> >> /* initialize the remaining CU data in one memset */ >> - memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? >> BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions); >> + memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? >> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions); >> >> for (int8_t i = 0; i < NUM_TU_DEPTH; i++) >> m_refTuDepth[i] = -1; >> @@ -358,7 +360,7 @@ void CUData::initSubCU(const CUData& ctu, const >> CUGeom& cuGeom, int qp) >> m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); >> >> /* initialize the remaining CU data in one memset */ >> - memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? >> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions); >> + memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? >> BytesPerPartition - 14 : BytesPerPartition - 10) * m_numPartitions); >> memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); >> } >> >> diff --git a/source/common/cudata.h b/source/common/cudata.h >> index 8397f0568..d58f53e39 100644 >> --- a/source/common/cudata.h >> +++ b/source/common/cudata.h >> @@ -192,6 +192,7 @@ public: >> /* Per-part data, stored contiguously */ >> int8_t* m_qp; // array of QP values >> int8_t* m_qpAnalysis; // array of QP values for analysis >> reuse >> + int8_t* m_qpreuse; // array of QP values for analysis >> reuse for reuse levels > 1 and < 10 >> uint8_t* m_log2CUSize; // array of cu log2Size TODO: >> seems redundant to depth >> uint8_t* m_lumaIntraDir; // array of intra directions (luma) >> uint8_t* m_tqBypass; // array of CU lossless flags >> @@ -207,7 +208,7 @@ public: >> uint8_t* m_transformSkip[3]; // array of transform skipping >> flags per plane >> uint8_t* m_cbf[3]; // array of coded block flags >> (CBF) per plane >> uint8_t* m_chromaIntraDir; // array of intra directions >> (chroma) >> - enum { BytesPerPartition = 24 }; // combined sizeof() of all >> per-part data >> + enum { BytesPerPartition = 25 }; // combined sizeof() of all >> per-part data >> >> sse_t* m_distortion; >> coeff_t* m_trCoeff[3]; // transformed coefficient buffer >> per plane >> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp >> index aabf386ca..b1d7e3ad1 100644 >> --- a/source/encoder/analysis.cpp >> +++ b/source/encoder/analysis.cpp >> @@ -520,6 +520,9 @@ uint64_t Analysis::compressIntraCU(const CUData& >> parentCTU, const CUGeom& cuGeom >> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); >> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); >> >> + if (m_param->rc.cuTree) >> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; >> + >> bool bAlreadyDecided = m_param->intraRefine != 4 && >> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && >> !(m_param->bAnalysisType == HEVC_INFO); >> bool bDecidedDepth = m_param->intraRefine != 4 && >> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; >> int split = 0; >> @@ -870,6 +873,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData& >> parentCTU, const CUGeom& c >> uint32_t minDepth = m_param->rdLevel <= 4 ? >> topSkipMinDepth(parentCTU, cuGeom) : 0; >> uint32_t splitRefs[4] = { 0, 0, 0, 0 }; >> >> + if (m_param->rc.cuTree) >> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; >> + >> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not >> support RD 0 or 1\n"); >> >> PMODE pmode(*this, cuGeom); >> @@ -1152,6 +1158,8 @@ SplitData Analysis::compressInterCU_rd0_4(const >> CUData& parentCTU, const CUGeom& >> uint32_t cuAddr = parentCTU.m_cuAddr; >> ModeDepth& md = m_modeDepth[depth]; >> >> + if (m_param->rc.cuTree) >> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; >> >> if (m_param->searchMethod == X265_SEA) >> { >> @@ -1856,6 +1864,9 @@ SplitData Analysis::compressInterCU_rd5_6(const >> CUData& parentCTU, const CUGeom& >> ModeDepth& md = m_modeDepth[depth]; >> md.bestMode = NULL; >> >> + if (m_param->rc.cuTree) >> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp; >> + >> if (m_param->searchMethod == X265_SEA) >> { >> int numPredDir = m_slice->isInterP() ? 1 : 2; >> @@ -3643,15 +3654,26 @@ int Analysis::calculateQpforCuSize(const CUData& >> ctu, const CUGeom& cuGeom, int3 >> if ((distortionData->threshold[ctu.m_cuAddr] < 0.9 || >> distortionData->threshold[ctu.m_cuAddr] > 1.1) >> && distortionData->highDistortionCtuCount && >> distortionData->lowDistortionCtuCount) >> qp += distortionData->offset[ctu.m_cuAddr]; >> - } >> + } >> >> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree) >> { >> - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >> cuGeom.absPartIdx; >> - if (ctu.m_slice->m_sliceType == I_SLICE) >> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >> (int32_t)(qp + 0.5 + >> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx])); >> + if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel >> == 10) >> + { >> + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >> cuGeom.absPartIdx; >> + if (ctu.m_slice->m_sliceType == I_SLICE) >> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >> (int32_t)(qp + 0.5 + >> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx])); >> + else >> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >> (int32_t)(qp + 0.5 + >> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx])); >> + } >> else >> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >> (int32_t)(qp + 0.5 + >> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx])); >> + { >> + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >> cuGeom.geomRecurId; >> + if (ctu.m_slice->m_sliceType == I_SLICE) >> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >> (int32_t)(qp + 0.5 + >> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOffReuse[cuIdx])); >> + else >> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >> (int32_t)(qp + 0.5 + >> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOffReuse[cuIdx])); >> + } >> > [KS] Why is this reuse not applicable to reuse level 1? > [SK] Not sure of the improvements or gain in this case. Since this is a general question , we will be tracking this and other improvements possible for multipass encoding as a separate action item under x265-Story - 1059. > } >> if (m_param->rc.hevcAq) >> { >> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp >> index a986355e0..0f266d328 100644 >> --- a/source/encoder/api.cpp >> +++ b/source/encoder/api.cpp >> @@ -825,7 +825,10 @@ void x265_alloc_analysis_data(x265_param *param, >> x265_analysis_data* analysis) >> CHECKED_MALLOC_ZERO(intraData->partSizes, char, >> analysis->numPartitions * analysis->numCUsInFrame); >> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> if (param->rc.cuTree) >> + { >> CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> + CHECKED_MALLOC_ZERO(intraData->cuQPOffReuse, int8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> + } >> } >> analysis->intraData = intraData; >> >> @@ -837,7 +840,10 @@ void x265_alloc_analysis_data(x265_param *param, >> x265_analysis_data* analysis) >> CHECKED_MALLOC_ZERO(interData->modes, uint8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> >> if (param->rc.cuTree && !isMultiPassOpt) >> + { >> CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> + CHECKED_MALLOC_ZERO(interData->cuQPOffReuse, int8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> + } >> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, >> analysis->numPartitions * analysis->numCUsInFrame); >> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, >> analysis->numPartitions * analysis->numCUsInFrame); >> @@ -919,7 +925,10 @@ void x265_free_analysis_data(x265_param *param, >> x265_analysis_data* analysis) >> X265_FREE((analysis->intraData)->partSizes); >> X265_FREE((analysis->intraData)->chromaModes); >> if (param->rc.cuTree) >> + { >> X265_FREE((analysis->intraData)->cuQPOff); >> + X265_FREE((analysis->intraData)->cuQPOffReuse); >> + } >> } >> X265_FREE(analysis->intraData); >> analysis->intraData = NULL; >> @@ -931,7 +940,10 @@ void x265_free_analysis_data(x265_param *param, >> x265_analysis_data* analysis) >> X265_FREE((analysis->interData)->depth); >> X265_FREE((analysis->interData)->modes); >> if (!isMultiPassOpt && param->rc.cuTree) >> + { >> X265_FREE((analysis->interData)->cuQPOff); >> + X265_FREE((analysis->interData)->cuQPOffReuse); >> + } >> X265_FREE((analysis->interData)->mvpIdx[0]); >> X265_FREE((analysis->interData)->mvpIdx[1]); >> X265_FREE((analysis->interData)->mv[0]); >> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp >> index 1f710e1ce..9666744f3 100644 >> --- a/source/encoder/encoder.cpp >> +++ b/source/encoder/encoder.cpp >> @@ -4452,19 +4452,25 @@ void >> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >> return; >> >> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >> *partSizes = NULL; >> - int8_t *cuQPBuf = NULL; >> + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL; >> >> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); >> depthBuf = tempBuf; >> modeBuf = tempBuf + depthBytes; >> partSizes = tempBuf + 2 * depthBytes; >> if (m_param->rc.cuTree) >> + { >> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition * >> analysis->numCUsInFrame); >> > [KS] Check whitespaces > >> + } >> >> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, intraPic->depth); >> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, intraPic->chromaModes); >> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, intraPic->partSizes); >> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >> + if (m_param->rc.cuTree) { >> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, >> m_analysisFileIn, intraPic->cuQPOff); >> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), (scaledNumPartition >> * analysis->numCUsInFrame), m_analysisFileIn, intraPic->cuQPOffReuse); >> + } >> >> size_t count = 0; >> for (uint32_t d = 0; d < depthBytes; d++) >> @@ -4484,7 +4490,11 @@ void Encoder::readAnalysisFile(x265_analysis_data* >> analysis, int curPoc, const x >> memset(&(analysis->intraData)->cuQPOff[count], >> cuQPBuf[d], bytes); >> count += bytes; >> } >> - >> + if (m_param->rc.cuTree) >> + { >> + for (uint32_t i = 0; i < (scaledNumPartition * >> analysis->numCUsInFrame); i++) >> + memset(&(analysis->intraData)->cuQPOffReuse[i], >> cuQPReuseBuf[i], sizeof(int8_t)); >> + } >> if (!m_param->scaleFactor) >> { >> X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), >> numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes); >> @@ -4498,7 +4508,10 @@ void Encoder::readAnalysisFile(x265_analysis_data* >> analysis, int curPoc, const x >> X265_FREE(tempLumaBuf); >> } >> if (m_param->rc.cuTree) >> + { >> X265_FREE(cuQPBuf); >> + X265_FREE(cuQPReuseBuf); >> + } >> X265_FREE(tempBuf); >> consumedBytes += frameRecordSize; >> } >> @@ -4515,7 +4528,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >> analysis, int curPoc, const x >> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >> MV* mv[2]; >> int8_t* refIdx[2]; >> - int8_t* cuQPBuf = NULL; >> + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL; >> > [KS] Why can't we reuse cuQPBuf ? I agree that the size of offsets differ > for reuse level 10 and others but that can be taken care of in allocation. > [SK] We can use the same and also use the same buffer in analysis data for all reuse levels. Hence optimized the memory footprint per frame. > >> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; >> bool bIntraInInter = false; >> @@ -4536,11 +4549,17 @@ void >> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >> depthBuf = tempBuf; >> modeBuf = tempBuf + depthBytes; >> if (m_param->rc.cuTree) >> + { >> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition * >> analysis->numCUsInFrame); >> + } >> >> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, interPic->depth); >> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, interPic->modes); >> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, >> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); } >> + if (m_param->rc.cuTree) { >> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, >> m_analysisFileIn, interPic->cuQPOff); >> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), >> (scaledNumPartition * analysis->numCUsInFrame), m_analysisFileIn, >> interPic->cuQPOffReuse); >> + } >> >> if (m_param->analysisLoadReuseLevel > 4) >> { >> @@ -4611,9 +4630,17 @@ void Encoder::readAnalysisFile(x265_analysis_data* >> analysis, int curPoc, const x >> } >> count += bytes; >> } >> + if (m_param->rc.cuTree) >> + { >> + for (uint32_t i = 0; i < (scaledNumPartition * >> analysis->numCUsInFrame); i++) >> + memset(&(analysis->interData)->cuQPOffReuse[i], >> cuQPReuseBuf[i], sizeof(int8_t)); >> + } >> >> if (m_param->rc.cuTree) >> + { >> X265_FREE(cuQPBuf); >> + X265_FREE(cuQPReuseBuf); >> + } >> X265_FREE(tempBuf); >> } >> if (m_param->analysisLoadReuseLevel == 10) >> @@ -4814,19 +4841,26 @@ void >> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >> return; >> >> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >> *partSizes = NULL; >> - int8_t *cuQPBuf = NULL; >> + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;; >> >> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); >> depthBuf = tempBuf; >> modeBuf = tempBuf + depthBytes; >> partSizes = tempBuf + 2 * depthBytes; >> if (m_param->rc.cuTree) >> + { >> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >> + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions >> / factor) * analysis->numCUsInFrame); >> + } >> >> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, intraPic->depth); >> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, intraPic->chromaModes); >> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, intraPic->partSizes); >> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >> + if (m_param->rc.cuTree) >> + { >> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, >> m_analysisFileIn, intraPic->cuQPOff); >> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), >> ((analysis->numPartitions / factor) * analysis->numCUsInFrame), >> m_analysisFileIn, intraPic->cuQPOffReuse); >> + } >> >> uint32_t count = 0; >> for (uint32_t d = 0; d < depthBytes; d++) >> @@ -4869,7 +4903,10 @@ void Encoder::readAnalysisFile(x265_analysis_data* >> analysis, int curPoc, const x >> } >> X265_FREE(tempLumaBuf); >> if (m_param->rc.cuTree) >> + { >> X265_FREE(cuQPBuf); >> + X265_FREE(cuQPReuseBuf); >> + } >> X265_FREE(tempBuf); >> consumedBytes += frameRecordSize; >> } >> @@ -4886,7 +4923,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >> analysis, int curPoc, const x >> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >> MV* mv[2]; >> int8_t* refIdx[2]; >> - int8_t* cuQPBuf = NULL; >> + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL; >> >> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; >> bool bIntraInInter = false; >> @@ -4901,11 +4938,18 @@ void >> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >> depthBuf = tempBuf; >> modeBuf = tempBuf + depthBytes; >> if (m_param->rc.cuTree) >> + { >> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >> + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions >> / factor) * analysis->numCUsInFrame); >> + } >> >> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, interPic->depth); >> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >> m_analysisFileIn, interPic->modes); >> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >> depthBytes, m_analysisFileIn, interPic->cuQPOff); } >> + if (m_param->rc.cuTree) >> + { >> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, >> m_analysisFileIn, interPic->cuQPOff); >> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), >> (analysis->numPartitions / factor) * analysis->numCUsInFrame, >> m_analysisFileIn, interPic->cuQPOffReuse); >> + } >> if (m_param->analysisLoadReuseLevel > 4) >> { >> partSize = modeBuf + depthBytes; >> @@ -5017,7 +5061,16 @@ void Encoder::readAnalysisFile(x265_analysis_data* >> analysis, int curPoc, const x >> } >> >> if (m_param->rc.cuTree) >> + { >> + for (uint32_t i = 0; i < ((analysis->numPartitions / factor) >> * analysis->numCUsInFrame); i++) >> + memset(&(analysis->interData)->cuQPOffReuse[i], >> cuQPReuseBuf[i], sizeof(int8_t)); >> + } >> + >> + if (m_param->rc.cuTree) >> + { >> X265_FREE(cuQPBuf); >> + X265_FREE(cuQPReuseBuf); >> + } >> X265_FREE(tempBuf); >> >> if (m_param->analysisLoadReuseLevel == 10) >> @@ -5540,6 +5593,12 @@ void >> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >> intraDataCTU->cuQPOff[depthBytes] = >> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); >> absPartIdx += ctu->m_numPartitions >> (depth * 2); >> } >> + >> + if (m_param->rc.cuTree) >> + { >> + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j >> = 0; j < ctu->m_numPartitions; i++, j++) >> + intraDataCTU->cuQPOffReuse[i] = >> (int8_t)(ctu->m_qpreuse[j] - baseQP); >> + } >> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >> ctu->m_numPartitions); >> } >> } >> @@ -5599,13 +5658,20 @@ void >> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >> } >> absPartIdx += ctu->m_numPartitions >> (depth * 2); >> } >> + >> + if (m_param->rc.cuTree) >> + { >> + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j >> = 0; j < ctu->m_numPartitions; i++, j++) >> + interDataCTU->cuQPOffReuse[i] = >> (int8_t)(ctu->m_qpreuse[j] - baseQP); >> + } >> + >> if (m_param->analysisSaveReuseLevel == 10 && >> bIntraInInter) >> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >> ctu->m_numPartitions); >> } >> } >> >> if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType >> == X265_TYPE_I) && m_param->rc.cuTree) >> - analysis->frameRecordSize += sizeof(uint8_t)* >> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + >> (sizeof(int8_t) * depthBytes); >> + analysis->frameRecordSize += sizeof(uint8_t)* >> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + >> (sizeof(int8_t) * depthBytes) + (sizeof(int8_t) * analysis->numPartitions >> * analysis->numCUsInFrame); >> else if (analysis->sliceType == X265_TYPE_IDR || >> analysis->sliceType == X265_TYPE_I) >> analysis->frameRecordSize += sizeof(uint8_t)* >> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3; >> else >> @@ -5613,7 +5679,10 @@ void >> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag >> */ >> analysis->frameRecordSize += depthBytes * 2; >> if (m_param->rc.cuTree) >> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes); >> + { >> + analysis->frameRecordSize += (sizeof(int8_t) * >> depthBytes); >> + analysis->frameRecordSize += (sizeof(int8_t) * >> analysis->numPartitions * analysis->numCUsInFrame); >> + } >> if (m_param->analysisSaveReuseLevel > 4) >> analysis->frameRecordSize += (depthBytes * 2); >> >> @@ -5669,7 +5738,10 @@ void >> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t), >> depthBytes, m_analysisFileOut); >> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), >> depthBytes, m_analysisFileOut); >> if (m_param->rc.cuTree) >> + { >> X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), >> depthBytes, m_analysisFileOut); >> + X265_FWRITE((analysis->intraData)->cuQPOffReuse, >> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions), >> m_analysisFileOut); >> + } >> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), >> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut); >> } >> else >> @@ -5677,7 +5749,10 @@ void >> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), >> depthBytes, m_analysisFileOut); >> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), >> depthBytes, m_analysisFileOut); >> if (m_param->rc.cuTree) >> + { >> X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), >> depthBytes, m_analysisFileOut); >> + X265_FWRITE((analysis->interData)->cuQPOffReuse, >> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions), >> m_analysisFileOut); >> + } >> if (m_param->analysisSaveReuseLevel > 4) >> { >> X265_FWRITE((analysis->interData)->partSize, >> sizeof(uint8_t), depthBytes, m_analysisFileOut); >> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp >> index 0adb0d0db..3bc01268b 100644 >> --- a/source/encoder/slicetype.cpp >> +++ b/source/encoder/slicetype.cpp >> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, >> bool bKeyframe) >> >> if (!framecnt) >> { >> - if (m_param->rc.cuTree) >> + if (m_param->rc.cuTree && !m_param->analysisLoad) >> cuTree(frames, 0, bKeyframe); >> return; >> } >> diff --git a/source/x265.h b/source/x265.h >> index f44040ba7..d6a828539 100644 >> --- a/source/x265.h >> +++ b/source/x265.h >> @@ -145,6 +145,7 @@ typedef struct x265_analysis_intra_data >> char* partSizes; >> uint8_t* chromaModes; >> int8_t* cuQPOff; >> + int8_t* cuQPOffReuse; >> }x265_analysis_intra_data; >> >> typedef struct x265_analysis_MV >> @@ -170,6 +171,7 @@ typedef struct x265_analysis_inter_data >> x265_analysis_MV* mv[2]; >> int64_t* sadCost; >> int8_t* cuQPOff; >> + int8_t* cuQPOffReuse; >> }x265_analysis_inter_data; >> >> typedef struct x265_weight_param >> -- >> 2.23.0.windows.1 >> >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > > > -- > Regards, > Kavitha > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- *With Regards,* *Srikanth Kurapati.*
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel