On Thu, Jan 21, 2021 at 5:34 PM Srikanth Kurapati < srikanth.kurap...@multicorewareinc.com> wrote:
> > Adding to my reply above. > > [AM] Why MAX_NUM_CU_GEOMS combinations? > > [KS] Will optimize storage based on min-cu-size configuration. > > On Thu, Jan 21, 2021 at 4:09 PM Srikanth Kurapati < > srikanth.kurap...@multicorewareinc.com> wrote: > >> >> [AM] Can't we share lowres cutree stats generated at qg size granularity? >> Why MAX_NUM_CU_GEOMS combinations? >> >> [KS] If we share like that then we will have to calculate the dqp per cu >> at analysis phase just like save encode and we will not get the savings in >> cpu cycles there. Currently we are storing the final dqp derived from >> lowres mv costs at qg size granularity by taking the difference between the >> final qp and base qp per slice. >> > [AM] What is the memory footprint and performance impact of 1. Sharing cutree offsets per qg and collating CU-level offsets from qg-level offsets, and 2. Sharing cu-tree offsets of all partition sizes? I don't think #1 will have a significant hit on performance as the partition evaluations in load encode is restricted. > MAX_NUM_CU_GEOMS is 85 = ( 1 + 4 + 16 + 64 ) this is maximum number of >> partitions at which qp can be computed and used in a ctu. >> >> [AM] Won't this implicitly turn OFF cutree at reuse-level 1? >> >> [KS] Agreed and addressed. >> >> >> On Tue, Jan 19, 2021 at 11:12 PM Aruna Matheswaran < >> ar...@multicorewareinc.com> wrote: >> >>> >>> >>> On Mon, Jan 11, 2021 at 8:08 PM Srikanth Kurapati < >>> srikanth.kurap...@multicorewareinc.com> wrote: >>> >>>> From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001 >>>> From: Srikanth Kurapati <srikanth.kurap...@multicorewareinc.com> >>>> Date: Wed, 30 Dec 2020 17:00:08 +0530 >>>> Subject: [PATCH] fix: corrects output mismatch for cutree enabled >>>> analysis >>>> save/load enodes with reuse-levels in between 1 to 10 for similar >>>> encoder >>>> settings. >>>> >>>> --- >>>> source/abrEncApp.cpp | 14 +++- >>>> source/common/common.h | 3 +- >>>> source/common/cudata.h | 2 +- >>>> source/encoder/analysis.cpp | 31 ++++++++- >>>> source/encoder/analysis.h | 1 + >>>> source/encoder/api.cpp | 28 +++++++- >>>> source/encoder/encoder.cpp | 123 ++++++++++++++++++++++++++--------- >>>> source/encoder/slicetype.cpp | 2 +- >>>> source/x265.h | 4 +- >>>> 9 files changed, 166 insertions(+), 42 deletions(-) >>>> >>>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp >>>> index fa62ebf63..ea255e3f6 100644 >>>> --- a/source/abrEncApp.cpp >>>> +++ b/source/abrEncApp.cpp >>>> @@ -340,7 +340,12 @@ namespace X265_NS { >>>> memcpy(intraDst->partSizes, intraSrc->partSizes, >>>> sizeof(char) * src->depthBytes); >>>> memcpy(intraDst->chromaModes, intraSrc->chromaModes, >>>> sizeof(uint8_t) * src->depthBytes); >>>> if (m_param->rc.cuTree) >>>> - memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >>>> sizeof(int8_t) * src->depthBytes); >>>> + { >>>> + if (m_param->analysisSaveReuseLevel == 10) >>>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >>>> sizeof(int8_t) * src->depthBytes); >>>> + else >>>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >>>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS)); >>>> + } >>>> } >>>> else >>>> { >>>> @@ -355,7 +360,12 @@ namespace X265_NS { >>>> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * >>>> src->depthBytes); >>>> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * >>>> src->depthBytes); >>>> if (m_param->rc.cuTree) >>>> - memcpy(interDst->cuQPOff, interSrc->cuQPOff, >>>> sizeof(int8_t) * src->depthBytes); >>>> + { >>>> + if (m_param->analysisReuseLevel == 10) >>>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff, >>>> sizeof(int8_t) * src->depthBytes); >>>> + else >>>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff, >>>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS)); >>>> + } >>>> if (m_param->analysisSaveReuseLevel > 4) >>>> { >>>> memcpy(interDst->partSize, interSrc->partSize, >>>> sizeof(uint8_t) * src->depthBytes); >>>> diff --git a/source/common/common.h b/source/common/common.h >>>> index 8c06cd79e..0ffbf17eb 100644 >>>> --- a/source/common/common.h >>>> +++ b/source/common/common.h >>>> @@ -326,7 +326,8 @@ typedef int16_t coeff_t; // transform >>>> coefficient >>>> >>>> #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422) >>>> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420) >>>> -#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8 >>>> +#define MAX_NUM_CU_GEOMS 85 >>>> +#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8 >>>> >>>> #define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // >>>> Maximum number of transform coefficients, for a 32x32 transform >>>> #define MAX_NUM_TR_CATEGORIES 16 // 32, >>>> 16, 8, 4 transform categories each for luma and chroma >>>> diff --git a/source/common/cudata.h b/source/common/cudata.h >>>> index 8397f0568..c7d9a1972 100644 >>>> --- a/source/common/cudata.h >>>> +++ b/source/common/cudata.h >>>> @@ -371,7 +371,7 @@ struct CUDataMemPool >>>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) * >>>> numInstances); >>>> } >>>> else >>>> - { >>>> + { >>>> uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + >>>> CHROMA_V_SHIFT(csp)); >>>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * >>>> 2) * numInstances); >>>> } >>>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp >>>> index aabf386ca..22a4ba74f 100644 >>>> --- a/source/encoder/analysis.cpp >>>> +++ b/source/encoder/analysis.cpp >>>> @@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& >>>> frame, const CUGeom& cuGeom, con >>>> if (m_param->analysisSave && !m_param->analysisLoad) >>>> for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * >>>> numPredDir; i++) >>>> m_reuseRef[i] = -1; >>>> + >>>> + if (m_param->rc.cuTree) >>>> + m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr * >>>> MAX_NUM_CU_GEOMS]; >>>> } >>>> ProfileCUScope(ctu, totalCTUTime, totalCTUs); >>>> >>>> @@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& >>>> frame, const CUGeom& cuGeom, con >>>> memcpy(ctu.m_partSize, >>>> &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * >>>> numPartition); >>>> memcpy(ctu.m_chromaIntraDir, >>>> &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * >>>> numPartition); >>>> } >>>> + if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10) >>>> + m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr * >>>> MAX_NUM_CU_GEOMS]; >>>> compressIntraCU(ctu, cuGeom, qp); >>>> } >>>> else >>>> @@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData& >>>> parentCTU, const CUGeom& cuGeom >>>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); >>>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); >>>> >>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>>> m_param->analysisSaveReuseLevel < 10) >>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>>> >>> + >>>> bool bAlreadyDecided = m_param->intraRefine != 4 && >>>> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && >>>> !(m_param->bAnalysisType == HEVC_INFO); >>>> bool bDecidedDepth = m_param->intraRefine != 4 && >>>> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; >>>> int split = 0; >>>> @@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const >>>> CUData& parentCTU, const CUGeom& c >>>> uint32_t minDepth = m_param->rdLevel <= 4 ? >>>> topSkipMinDepth(parentCTU, cuGeom) : 0; >>>> uint32_t splitRefs[4] = { 0, 0, 0, 0 }; >>>> >>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>>> m_param->analysisSaveReuseLevel < 10) >>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>>> + >>>> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not >>>> support RD 0 or 1\n"); >>>> >>>> PMODE pmode(*this, cuGeom); >>>> @@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const >>>> CUData& parentCTU, const CUGeom& >>>> uint32_t cuAddr = parentCTU.m_cuAddr; >>>> ModeDepth& md = m_modeDepth[depth]; >>>> >>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>>> m_param->analysisSaveReuseLevel < 10) >>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>>> >>>> if (m_param->searchMethod == X265_SEA) >>>> { >>>> @@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const >>>> CUData& parentCTU, const CUGeom& >>>> ModeDepth& md = m_modeDepth[depth]; >>>> md.bestMode = NULL; >>>> >>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>>> m_param->analysisSaveReuseLevel < 10) >>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>>> + >>>> if (m_param->searchMethod == X265_SEA) >>>> { >>>> int numPredDir = m_slice->isInterP() ? 1 : 2; >>>> @@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const >>>> CUData& ctu, const CUGeom& cuGeom, int3 >>>> >>>> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree) >>>> { >>>> - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >>>> cuGeom.absPartIdx; >>>> + int cuIdx; >>>> + int8_t cuQPOffSet = 0; >>>> + >>>> + if (m_param->scaleFactor == 2 || >>>> m_param->analysisLoadReuseLevel == 10) >>>> + cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >>>> cuGeom.absPartIdx; >>>> + else >>>> + cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) + >>>> cuGeom.geomRecurId; >>>> + >>>> if (ctu.m_slice->m_sliceType == I_SLICE) >>>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >>>> (int32_t)(qp + 0.5 + >>>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx])); >>>> + cuQPOffSet = >>>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]; >>>> else >>>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >>>> (int32_t)(qp + 0.5 + >>>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx])); >>>> + cuQPOffSet = >>>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]; >>>> + >>>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >>>> (int32_t)(qp + 0.5 + cuQPOffSet)); >>>> } >>>> if (m_param->rc.hevcAq) >>>> { >>>> diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h >>>> index 3bcb56bc3..8d76d5c5e 100644 >>>> --- a/source/encoder/analysis.h >>>> +++ b/source/encoder/analysis.h >>>> @@ -126,6 +126,7 @@ protected: >>>> int32_t* m_reuseRef; >>>> uint8_t* m_reuseDepth; >>>> uint8_t* m_reuseModes; >>>> + int8_t * m_reuseQP; // array of QP values for >>>> analysis reuse at reuse levels > 1 and < 10 when cutree is enabled >>>> uint8_t* m_reusePartSize; >>>> uint8_t* m_reuseMergeFlag; >>>> x265_analysis_MV* m_reuseMv[2]; >>>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp >>>> index a986355e0..2c90fe8f2 100644 >>>> --- a/source/encoder/api.cpp >>>> +++ b/source/encoder/api.cpp >>>> @@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param, >>>> x265_analysis_data* analysis) >>>> CHECKED_MALLOC_ZERO(intraData->partSizes, char, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> if (param->rc.cuTree) >>>> - CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> + { >>>> + if (maxReuseLevel == 10) >>>> + { >>>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> + } >>>> + else >>>> + { >>>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >>>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame); >>>> + } >>>> + } >>>> } >>>> analysis->intraData = intraData; >>>> >>>> @@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param, >>>> x265_analysis_data* analysis) >>>> CHECKED_MALLOC_ZERO(interData->modes, uint8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> >>>> if (param->rc.cuTree && !isMultiPassOpt) >>>> - CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> + { >>>> + if (maxReuseLevel == 10) >>>> + { >>>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> + } >>>> + else >>>> + { >>>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >>>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame); >>>> >>> [AM] Can't we share lowres cutree stats generated at qg size >>> granularity? Why MAX_NUM_CU_GEOMS combinations? >>> >>>> + } >>>> + } >>>> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, >>>> analysis->numPartitions * analysis->numCUsInFrame); >>>> @@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param, >>>> x265_analysis_data* analysis) >>>> X265_FREE((analysis->intraData)->partSizes); >>>> X265_FREE((analysis->intraData)->chromaModes); >>>> if (param->rc.cuTree) >>>> - X265_FREE((analysis->intraData)->cuQPOff); >>>> + { >>>> + X265_FREE_ZERO((analysis->intraData)->cuQPOff); >>>> + } >>>> } >>>> X265_FREE(analysis->intraData); >>>> analysis->intraData = NULL; >>>> @@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param, >>>> x265_analysis_data* analysis) >>>> X265_FREE((analysis->interData)->depth); >>>> X265_FREE((analysis->interData)->modes); >>>> if (!isMultiPassOpt && param->rc.cuTree) >>>> + { >>>> X265_FREE((analysis->interData)->cuQPOff); >>>> + } >>>> X265_FREE((analysis->interData)->mvpIdx[0]); >>>> X265_FREE((analysis->interData)->mvpIdx[1]); >>>> X265_FREE((analysis->interData)->mv[0]); >>>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp >>>> index 1f710e1ce..5eb123d31 100644 >>>> --- a/source/encoder/encoder.cpp >>>> +++ b/source/encoder/encoder.cpp >>>> @@ -4444,6 +4444,26 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> } >>>> } >>>> } >>>> + >>>> + int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL; >>>> + uint32_t reuseBufSize = 0; >>>> + >>>> + if (m_param->rc.cuTree) >>>> + { >>>> + if (m_param->analysisLoadReuseLevel == 10) >>>> + reuseBufSize = depthBytes; >>>> + else if (m_param->analysisLoadReuseLevel > 1) >>>> + reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame; >>>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize); >>>> + if (!m_param->bUseAnalysisFile) >>>> + { >>>> + if (analysis->sliceType == X265_TYPE_IDR || >>>> analysis->sliceType == X265_TYPE_I) >>>> + cuQPOffSets = intraPic->cuQPOff; >>>> + else >>>> + cuQPOffSets = interPic->cuQPOff; >>>> + } >>>> + } >>>> + >>>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == >>>> X265_TYPE_I) >>>> { >>>> if (m_param->bAnalysisType == HEVC_INFO) >>>> @@ -4452,19 +4472,21 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> return; >>>> >>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>>> *partSizes = NULL; >>>> - int8_t *cuQPBuf = NULL; >>>> >>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); >>>> depthBuf = tempBuf; >>>> modeBuf = tempBuf + depthBytes; >>>> partSizes = tempBuf + 2 * depthBytes; >>>> - if (m_param->rc.cuTree) >>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>>> >>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, intraPic->depth); >>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, intraPic->chromaModes); >>>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, intraPic->partSizes); >>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >>>> + if (m_param->rc.cuTree) >>>> + { >>>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, >>>> m_analysisFileIn, cuQPOffSets); >>>> + if (m_param->analysisLoadReuseLevel > 1 && >>>> m_param->analysisLoadReuseLevel < 10) >>>> + memcpy(analysis->intraData->cuQPOff, cuQPBuf, >>>> sizeof(int8_t) * reuseBufSize); >>>> + } >>>> >>>> size_t count = 0; >>>> for (uint32_t d = 0; d < depthBytes; d++) >>>> @@ -4480,7 +4502,7 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> memset(&(analysis->intraData)->depth[count], depthBuf[d], >>>> bytes); >>>> memset(&(analysis->intraData)->chromaModes[count], >>>> modeBuf[d], bytes); >>>> memset(&(analysis->intraData)->partSizes[count], >>>> partSizes[d], bytes); >>>> - if (m_param->rc.cuTree) >>>> + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel >>>> == 10) >>>> memset(&(analysis->intraData)->cuQPOff[count], >>>> cuQPBuf[d], bytes); >>>> count += bytes; >>>> } >>>> @@ -4515,7 +4537,6 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >>>> MV* mv[2]; >>>> int8_t* refIdx[2]; >>>> - int8_t* cuQPBuf = NULL; >>>> >>>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; >>>> bool bIntraInInter = false; >>>> @@ -4535,12 +4556,15 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf); >>>> depthBuf = tempBuf; >>>> modeBuf = tempBuf + depthBytes; >>>> - if (m_param->rc.cuTree) >>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>>> >>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, interPic->depth); >>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, interPic->modes); >>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, >>>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); } >>>> + if (m_param->rc.cuTree) >>>> + { >>>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, >>>> m_analysisFileIn, cuQPOffSets); >>>> + if (m_param->analysisLoadReuseLevel > 1 && >>>> m_param->analysisLoadReuseLevel < 10) >>>> + memcpy(analysis->interData->cuQPOff, cuQPBuf, >>>> sizeof(int8_t) * reuseBufSize); >>>> + } >>>> >>>> if (m_param->analysisLoadReuseLevel > 4) >>>> { >>>> @@ -4578,7 +4602,7 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> depthBuf[d] = 1; >>>> memset(&(analysis->interData)->depth[count], >>>> depthBuf[d], bytes); >>>> memset(&(analysis->interData)->modes[count], >>>> modeBuf[d], bytes); >>>> - if (m_param->rc.cuTree) >>>> + if (m_param->rc.cuTree && >>>> m_param->analysisLoadReuseLevel == 10) >>>> memset(&(analysis->interData)->cuQPOff[count], >>>> cuQPBuf[d], bytes); >>>> if (m_param->analysisLoadReuseLevel > 4) >>>> { >>>> @@ -4736,7 +4760,7 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> int numPartitions = analysis->numPartitions; >>>> int numCUsInFrame = analysis->numCUsInFrame; >>>> int numCuInHeight = analysis->numCuInHeight; >>>> - /* Allocate memory for scaled resoultion's numPartitions and >>>> numCUsInFrame*/ >>>> + /* Allocate memory for scaled resolution's numPartitions and >>>> numCUsInFrame */ >>>> analysis->numPartitions = m_param->num4x4Partitions; >>>> analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; >>>> analysis->numCuInHeight = cuLoc.heightInCU; >>>> @@ -4808,25 +4832,40 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> X265_FREE(vbvCostBuf); >>>> } >>>> >>>> + uint32_t reuseBufSize = 0; >>>> + int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL; >>>> + if (m_param->rc.cuTree) >>>> + { >>>> + if (m_param->analysisLoadReuseLevel == 10) >>>> + reuseBufSize = depthBytes; >>>> + else if (m_param->analysisLoadReuseLevel > 1) >>>> + reuseBufSize = (MAX_NUM_CU_GEOMS / factor) * >>>> (analysis->numCUsInFrame); >>>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize); >>>> + if (!m_param->bUseAnalysisFile) >>>> + { >>>> + if (analysis->sliceType == X265_TYPE_IDR || >>>> analysis->sliceType == X265_TYPE_I) >>>> + cuQPOffSets = intraPic->cuQPOff; >>>> + else >>>> + cuQPOffSets = interPic->cuQPOff; >>>> + } >>>> + } >>>> + >>>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == >>>> X265_TYPE_I) >>>> { >>>> if (m_param->analysisLoadReuseLevel < 2) >>>> return; >>>> >>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>>> *partSizes = NULL; >>>> - int8_t *cuQPBuf = NULL; >>>> >>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); >>>> depthBuf = tempBuf; >>>> modeBuf = tempBuf + depthBytes; >>>> partSizes = tempBuf + 2 * depthBytes; >>>> - if (m_param->rc.cuTree) >>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>>> >>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, intraPic->depth); >>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, intraPic->chromaModes); >>>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, intraPic->partSizes); >>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >>>> + if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>>> reuseBufSize, m_analysisFileIn, cuQPOffSets); } >>>> >>>> uint32_t count = 0; >>>> for (uint32_t d = 0; d < depthBytes; d++) >>>> @@ -4848,7 +4887,7 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> memset(&(analysis->intraData)->depth[count], >>>> depthBuf[d], bytes); >>>> memset(&(analysis->intraData)->chromaModes[count], >>>> modeBuf[d], bytes); >>>> memset(&(analysis->intraData)->partSizes[count], >>>> partSizes[d], bytes); >>>> - if (m_param->rc.cuTree) >>>> + if (m_param->rc.cuTree && >>>> m_param->analysisLoadReuseLevel == 10) >>>> memset(&(analysis->intraData)->cuQPOff[count], >>>> cuQPBuf[d], bytes); >>>> count += bytes; >>>> d += getCUIndex(&cuLoc, &count, bytes, 1); >>>> @@ -4886,7 +4925,6 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >>>> MV* mv[2]; >>>> int8_t* refIdx[2]; >>>> - int8_t* cuQPBuf = NULL; >>>> >>>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; >>>> bool bIntraInInter = false; >>>> @@ -4900,12 +4938,16 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf); >>>> depthBuf = tempBuf; >>>> modeBuf = tempBuf + depthBytes; >>>> - if (m_param->rc.cuTree) >>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>>> >>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, interPic->depth); >>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>>> m_analysisFileIn, interPic->modes); >>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>>> depthBytes, m_analysisFileIn, interPic->cuQPOff); } >>>> + if (m_param->rc.cuTree) >>>> + { >>>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, >>>> m_analysisFileIn, cuQPOffSets); >>>> + if (m_param->analysisLoadReuseLevel > 1 && >>>> m_param->analysisLoadReuseLevel < 10) >>>> + memcpy(&(analysis->interData)->cuQPOff, cuQPBuf, >>>> sizeof(int8_t) * reuseBufSize); >>>> + } >>>> + >>>> if (m_param->analysisLoadReuseLevel > 4) >>>> { >>>> partSize = modeBuf + depthBytes; >>>> @@ -4954,7 +4996,7 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> { >>>> memset(&(analysis->interData)->depth[count], >>>> writeDepth, bytes); >>>> memset(&(analysis->interData)->modes[count], >>>> modeBuf[d], bytes); >>>> - if (m_param->rc.cuTree) >>>> + if (m_param->rc.cuTree && >>>> m_param->analysisLoadReuseLevel == 10) >>>> memset(&(analysis->interData)->cuQPOff[count], >>>> cuQPBuf[d], bytes); >>>> if (m_param->analysisLoadReuseLevel == 10 && >>>> bIntraInInter) >>>> memset(&(analysis->intraData)->chromaModes[count], >>>> chromaDir[d], bytes); >>>> @@ -5046,7 +5088,9 @@ void >>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>>> } >>>> } >>>> else >>>> + { >>>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t), >>>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, >>>> m_analysisFileIn, interPic->ref); >>>> + } >>>> >>>> consumedBytes += frameRecordSize; >>>> if (numDir == 1) >>>> @@ -5510,9 +5554,10 @@ void >>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>>> analysis->frameRecordSize += analysis->numCUsInFrame * >>>> sizeof(sse_t); >>>> } >>>> >>>> + uint32_t reuseQPBufsize = 0; >>>> if (m_param->analysisSaveReuseLevel > 1) >>>> { >>>> - >>>> + reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame; >>>> if (analysis->sliceType == X265_TYPE_IDR || >>>> analysis->sliceType == X265_TYPE_I) >>>> { >>>> for (uint32_t cuAddr = 0; cuAddr < >>>> analysis->numCUsInFrame; cuAddr++) >>>> @@ -5536,12 +5581,21 @@ void >>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>>> partSize = ctu->m_partSize[absPartIdx]; >>>> intraDataCTU->partSizes[depthBytes] = partSize; >>>> >>>> - if (m_param->rc.cuTree) >>>> + if (m_param->rc.cuTree && >>>> m_param->analysisSaveReuseLevel == 10) >>>> intraDataCTU->cuQPOff[depthBytes] = >>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); >>>> absPartIdx += ctu->m_numPartitions >> (depth * 2); >>>> } >>>> + >>>> + if (m_param->rc.cuTree && >>>> m_param->analysisSaveReuseLevel < 10) >>>> + { >>>> + uint32_t nextCuIdx = (cuAddr + 1) * >>>> MAX_NUM_CU_GEOMS; >>>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < >>>> nextCuIdx; i++) >>>> + intraDataCTU->cuQPOff[i] = >>>> (int8_t)(intraDataCTU->cuQPOff[i] - baseQP); >>>> + } >>>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >>>> ctu->m_numPartitions); >>>> } >>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel >>>> == 10) >>>> + reuseQPBufsize = depthBytes; >>>> } >>>> else >>>> { >>>> @@ -5567,7 +5621,7 @@ void >>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>>> predMode = 4; // used as indicator if the >>>> block is coded as bidir >>>> >>>> interDataCTU->modes[depthBytes] = predMode; >>>> - if (m_param->rc.cuTree) >>>> + if (m_param->rc.cuTree && >>>> m_param->analysisSaveReuseLevel == 10) >>>> interDataCTU->cuQPOff[depthBytes] = >>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); >>>> >>>> if (m_param->analysisSaveReuseLevel > 4) >>>> @@ -5599,13 +5653,23 @@ void >>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>>> } >>>> absPartIdx += ctu->m_numPartitions >> (depth * 2); >>>> } >>>> + >>>> + if (m_param->rc.cuTree && >>>> m_param->analysisSaveReuseLevel < 10) >>>> + { >>>> + uint32_t nextCuIdx = (cuAddr + 1) * >>>> MAX_NUM_CU_GEOMS; >>>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < >>>> nextCuIdx ; i++) >>>> + interDataCTU->cuQPOff[i] = >>>> (int8_t)(interDataCTU->cuQPOff[i] - baseQP); >>>> + } >>>> + >>>> if (m_param->analysisSaveReuseLevel == 10 && >>>> bIntraInInter) >>>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >>>> ctu->m_numPartitions); >>>> } >>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel >>>> == 10) >>>> + reuseQPBufsize = depthBytes; >>>> } >>>> >>>> if ((analysis->sliceType == X265_TYPE_IDR || >>>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree) >>>> - analysis->frameRecordSize += sizeof(uint8_t)* >>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + >>>> (sizeof(int8_t) * depthBytes); >>>> + analysis->frameRecordSize += sizeof(uint8_t)* >>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + >>>> (sizeof(int8_t) * reuseQPBufsize); >>>> else if (analysis->sliceType == X265_TYPE_IDR || >>>> analysis->sliceType == X265_TYPE_I) >>>> analysis->frameRecordSize += sizeof(uint8_t)* >>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3; >>>> else >>>> @@ -5613,7 +5677,8 @@ void >>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>>> /* Add sizeof depth, modes, partSize, cuQPOffset, >>>> mergeFlag */ >>>> analysis->frameRecordSize += depthBytes * 2; >>>> if (m_param->rc.cuTree) >>>> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes); >>>> + analysis->frameRecordSize += (sizeof(int8_t) * >>>> reuseQPBufsize); >>>> + >>>> if (m_param->analysisSaveReuseLevel > 4) >>>> analysis->frameRecordSize += (depthBytes * 2); >>>> >>>> @@ -5669,7 +5734,7 @@ void >>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>>> X265_FWRITE((analysis->intraData)->chromaModes, >>>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>>> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), >>>> depthBytes, m_analysisFileOut); >>>> if (m_param->rc.cuTree) >>>> - X265_FWRITE((analysis->intraData)->cuQPOff, >>>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>>> + X265_FWRITE((analysis->intraData)->cuQPOff, >>>> sizeof(int8_t), reuseQPBufsize, m_analysisFileOut); >>>> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), >>>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut); >>>> } >>>> else >>>> @@ -5677,7 +5742,7 @@ void >>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>>> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), >>>> depthBytes, m_analysisFileOut); >>>> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), >>>> depthBytes, m_analysisFileOut); >>>> if (m_param->rc.cuTree) >>>> - X265_FWRITE((analysis->interData)->cuQPOff, >>>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>>> + X265_FWRITE((analysis->interData)->cuQPOff, >>>> sizeof(int8_t), reuseQPBufsize, m_analysisFileOut); >>>> if (m_param->analysisSaveReuseLevel > 4) >>>> { >>>> X265_FWRITE((analysis->interData)->partSize, >>>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>>> @@ -5762,7 +5827,7 @@ void >>>> Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c >>>> interData->mv[1][depthBytes].word = >>>> ctu->m_mv[1][absPartIdx].word; >>>> interData->mvpIdx[1][depthBytes] = >>>> ctu->m_mvpIdx[1][absPartIdx]; >>>> ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx]; >>>> - predMode = 4; // used as indiacator if the block >>>> is coded as bidir >>>> + predMode = 4; // used as indicator if the block is >>>> coded as bidir >>>> } >>>> interData->modes[depthBytes] = predMode; >>>> >>>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp >>>> index 0adb0d0db..3bc01268b 100644 >>>> --- a/source/encoder/slicetype.cpp >>>> +++ b/source/encoder/slicetype.cpp >>>> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, >>>> bool bKeyframe) >>>> >>>> if (!framecnt) >>>> { >>>> - if (m_param->rc.cuTree) >>>> + if (m_param->rc.cuTree && !m_param->analysisLoad) >>>> >>> [AM] Won't this implicitly turn OFF cutree at reuse-level 1? >>> >>>> cuTree(frames, 0, bKeyframe); >>>> return; >>>> } >>>> diff --git a/source/x265.h b/source/x265.h >>>> index f44040ba7..8d7a75826 100644 >>>> --- a/source/x265.h >>>> +++ b/source/x265.h >>>> @@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data >>>> uint8_t* modes; >>>> char* partSizes; >>>> uint8_t* chromaModes; >>>> - int8_t* cuQPOff; >>>> + int8_t* cuQPOff; >>>> }x265_analysis_intra_data; >>>> >>>> typedef struct x265_analysis_MV >>>> @@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data >>>> uint8_t* interDir; >>>> uint8_t* mvpIdx[2]; >>>> int8_t* refIdx[2]; >>>> - x265_analysis_MV* mv[2]; >>>> + x265_analysis_MV* mv[2]; >>>> int64_t* sadCost; >>>> int8_t* cuQPOff; >>>> }x265_analysis_inter_data; >>>> -- >>>> 2.20.1.windows.1 >>>> >>>> >>>> -- >>>> *With Regards,* >>>> *Srikanth Kurapati.* >>>> _______________________________________________ >>>> x265-devel mailing list >>>> x265-devel@videolan.org >>>> https://mailman.videolan.org/listinfo/x265-devel >>>> >>> >>> >>> -- >>> Regards, >>> *Aruna Matheswaran,* >>> Video Codec Engineer, >>> Media & AI analytics BU, >>> >>> >>> >>> _______________________________________________ >>> x265-devel mailing list >>> x265-devel@videolan.org >>> https://mailman.videolan.org/listinfo/x265-devel >>> >> >> >> -- >> *With Regards,* >> *Srikanth Kurapati.* >> > > > -- > *With Regards,* > *Srikanth Kurapati.* > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- Regards, *Aruna Matheswaran,* Video Codec Engineer, Media & AI analytics BU,
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel