Adding to my reply above. [AM] Why MAX_NUM_CU_GEOMS combinations?
[KS] Will optimize storage based on min-cu-size configuration. On Thu, Jan 21, 2021 at 4:09 PM Srikanth Kurapati < srikanth.kurap...@multicorewareinc.com> wrote: > > [AM] Can't we share lowres cutree stats generated at qg size granularity? > Why MAX_NUM_CU_GEOMS combinations? > > [KS] If we share like that then we will have to calculate the dqp per cu > at analysis phase just like save encode and we will not get the savings in > cpu cycles there. Currently we are storing the final dqp derived from > lowres mv costs at qg size granularity by taking the difference between the > final qp and base qp per slice. > > MAX_NUM_CU_GEOMS is 85 = ( 1 + 4 + 16 + 64 ) this is maximum number of > partitions at which qp can be computed and used in a ctu. > > [AM] Won't this implicitly turn OFF cutree at reuse-level 1? > > [KS] Agreed and addressed. > > > On Tue, Jan 19, 2021 at 11:12 PM Aruna Matheswaran < > ar...@multicorewareinc.com> wrote: > >> >> >> On Mon, Jan 11, 2021 at 8:08 PM Srikanth Kurapati < >> srikanth.kurap...@multicorewareinc.com> wrote: >> >>> From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001 >>> From: Srikanth Kurapati <srikanth.kurap...@multicorewareinc.com> >>> Date: Wed, 30 Dec 2020 17:00:08 +0530 >>> Subject: [PATCH] fix: corrects output mismatch for cutree enabled >>> analysis >>> save/load enodes with reuse-levels in between 1 to 10 for similar >>> encoder >>> settings. >>> >>> --- >>> source/abrEncApp.cpp | 14 +++- >>> source/common/common.h | 3 +- >>> source/common/cudata.h | 2 +- >>> source/encoder/analysis.cpp | 31 ++++++++- >>> source/encoder/analysis.h | 1 + >>> source/encoder/api.cpp | 28 +++++++- >>> source/encoder/encoder.cpp | 123 ++++++++++++++++++++++++++--------- >>> source/encoder/slicetype.cpp | 2 +- >>> source/x265.h | 4 +- >>> 9 files changed, 166 insertions(+), 42 deletions(-) >>> >>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp >>> index fa62ebf63..ea255e3f6 100644 >>> --- a/source/abrEncApp.cpp >>> +++ b/source/abrEncApp.cpp >>> @@ -340,7 +340,12 @@ namespace X265_NS { >>> memcpy(intraDst->partSizes, intraSrc->partSizes, >>> sizeof(char) * src->depthBytes); >>> memcpy(intraDst->chromaModes, intraSrc->chromaModes, >>> sizeof(uint8_t) * src->depthBytes); >>> if (m_param->rc.cuTree) >>> - memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >>> sizeof(int8_t) * src->depthBytes); >>> + { >>> + if (m_param->analysisSaveReuseLevel == 10) >>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >>> sizeof(int8_t) * src->depthBytes); >>> + else >>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS)); >>> + } >>> } >>> else >>> { >>> @@ -355,7 +360,12 @@ namespace X265_NS { >>> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * >>> src->depthBytes); >>> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * >>> src->depthBytes); >>> if (m_param->rc.cuTree) >>> - memcpy(interDst->cuQPOff, interSrc->cuQPOff, >>> sizeof(int8_t) * src->depthBytes); >>> + { >>> + if (m_param->analysisReuseLevel == 10) >>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff, >>> sizeof(int8_t) * src->depthBytes); >>> + else >>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff, >>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS)); >>> + } >>> if (m_param->analysisSaveReuseLevel > 4) >>> { >>> memcpy(interDst->partSize, interSrc->partSize, >>> sizeof(uint8_t) * src->depthBytes); >>> diff --git a/source/common/common.h b/source/common/common.h >>> index 8c06cd79e..0ffbf17eb 100644 >>> --- a/source/common/common.h >>> +++ b/source/common/common.h >>> @@ -326,7 +326,8 @@ typedef int16_t coeff_t; // transform >>> coefficient >>> >>> #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422) >>> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420) >>> -#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8 >>> +#define MAX_NUM_CU_GEOMS 85 >>> +#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8 >>> >>> #define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // >>> Maximum number of transform coefficients, for a 32x32 transform >>> #define MAX_NUM_TR_CATEGORIES 16 // 32, >>> 16, 8, 4 transform categories each for luma and chroma >>> diff --git a/source/common/cudata.h b/source/common/cudata.h >>> index 8397f0568..c7d9a1972 100644 >>> --- a/source/common/cudata.h >>> +++ b/source/common/cudata.h >>> @@ -371,7 +371,7 @@ struct CUDataMemPool >>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) * >>> numInstances); >>> } >>> else >>> - { >>> + { >>> uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + >>> CHROMA_V_SHIFT(csp)); >>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * >>> 2) * numInstances); >>> } >>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp >>> index aabf386ca..22a4ba74f 100644 >>> --- a/source/encoder/analysis.cpp >>> +++ b/source/encoder/analysis.cpp >>> @@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& >>> frame, const CUGeom& cuGeom, con >>> if (m_param->analysisSave && !m_param->analysisLoad) >>> for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * >>> numPredDir; i++) >>> m_reuseRef[i] = -1; >>> + >>> + if (m_param->rc.cuTree) >>> + m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr * >>> MAX_NUM_CU_GEOMS]; >>> } >>> ProfileCUScope(ctu, totalCTUTime, totalCTUs); >>> >>> @@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& >>> frame, const CUGeom& cuGeom, con >>> memcpy(ctu.m_partSize, >>> &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * >>> numPartition); >>> memcpy(ctu.m_chromaIntraDir, >>> &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * >>> numPartition); >>> } >>> + if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10) >>> + m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr * >>> MAX_NUM_CU_GEOMS]; >>> compressIntraCU(ctu, cuGeom, qp); >>> } >>> else >>> @@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData& >>> parentCTU, const CUGeom& cuGeom >>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); >>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); >>> >>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>> m_param->analysisSaveReuseLevel < 10) >>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>> >> + >>> bool bAlreadyDecided = m_param->intraRefine != 4 && >>> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && >>> !(m_param->bAnalysisType == HEVC_INFO); >>> bool bDecidedDepth = m_param->intraRefine != 4 && >>> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; >>> int split = 0; >>> @@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const >>> CUData& parentCTU, const CUGeom& c >>> uint32_t minDepth = m_param->rdLevel <= 4 ? >>> topSkipMinDepth(parentCTU, cuGeom) : 0; >>> uint32_t splitRefs[4] = { 0, 0, 0, 0 }; >>> >>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>> m_param->analysisSaveReuseLevel < 10) >>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>> + >>> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not >>> support RD 0 or 1\n"); >>> >>> PMODE pmode(*this, cuGeom); >>> @@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const >>> CUData& parentCTU, const CUGeom& >>> uint32_t cuAddr = parentCTU.m_cuAddr; >>> ModeDepth& md = m_modeDepth[depth]; >>> >>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>> m_param->analysisSaveReuseLevel < 10) >>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>> >>> if (m_param->searchMethod == X265_SEA) >>> { >>> @@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const >>> CUData& parentCTU, const CUGeom& >>> ModeDepth& md = m_modeDepth[depth]; >>> md.bestMode = NULL; >>> >>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && >>> m_param->analysisSaveReuseLevel < 10) >>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; >>> + >>> if (m_param->searchMethod == X265_SEA) >>> { >>> int numPredDir = m_slice->isInterP() ? 1 : 2; >>> @@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const CUData& >>> ctu, const CUGeom& cuGeom, int3 >>> >>> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree) >>> { >>> - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >>> cuGeom.absPartIdx; >>> + int cuIdx; >>> + int8_t cuQPOffSet = 0; >>> + >>> + if (m_param->scaleFactor == 2 || >>> m_param->analysisLoadReuseLevel == 10) >>> + cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >>> cuGeom.absPartIdx; >>> + else >>> + cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) + >>> cuGeom.geomRecurId; >>> + >>> if (ctu.m_slice->m_sliceType == I_SLICE) >>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >>> (int32_t)(qp + 0.5 + >>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx])); >>> + cuQPOffSet = >>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]; >>> else >>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >>> (int32_t)(qp + 0.5 + >>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx])); >>> + cuQPOffSet = >>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]; >>> + >>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, >>> (int32_t)(qp + 0.5 + cuQPOffSet)); >>> } >>> if (m_param->rc.hevcAq) >>> { >>> diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h >>> index 3bcb56bc3..8d76d5c5e 100644 >>> --- a/source/encoder/analysis.h >>> +++ b/source/encoder/analysis.h >>> @@ -126,6 +126,7 @@ protected: >>> int32_t* m_reuseRef; >>> uint8_t* m_reuseDepth; >>> uint8_t* m_reuseModes; >>> + int8_t * m_reuseQP; // array of QP values for >>> analysis reuse at reuse levels > 1 and < 10 when cutree is enabled >>> uint8_t* m_reusePartSize; >>> uint8_t* m_reuseMergeFlag; >>> x265_analysis_MV* m_reuseMv[2]; >>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp >>> index a986355e0..2c90fe8f2 100644 >>> --- a/source/encoder/api.cpp >>> +++ b/source/encoder/api.cpp >>> @@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param, >>> x265_analysis_data* analysis) >>> CHECKED_MALLOC_ZERO(intraData->partSizes, char, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> if (param->rc.cuTree) >>> - CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + { >>> + if (maxReuseLevel == 10) >>> + { >>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + } >>> + else >>> + { >>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame); >>> + } >>> + } >>> } >>> analysis->intraData = intraData; >>> >>> @@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param, >>> x265_analysis_data* analysis) >>> CHECKED_MALLOC_ZERO(interData->modes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> >>> if (param->rc.cuTree && !isMultiPassOpt) >>> - CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + { >>> + if (maxReuseLevel == 10) >>> + { >>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + } >>> + else >>> + { >>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame); >>> >> [AM] Can't we share lowres cutree stats generated at qg size granularity? >> Why MAX_NUM_CU_GEOMS combinations? >> >>> + } >>> + } >>> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> @@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param, >>> x265_analysis_data* analysis) >>> X265_FREE((analysis->intraData)->partSizes); >>> X265_FREE((analysis->intraData)->chromaModes); >>> if (param->rc.cuTree) >>> - X265_FREE((analysis->intraData)->cuQPOff); >>> + { >>> + X265_FREE_ZERO((analysis->intraData)->cuQPOff); >>> + } >>> } >>> X265_FREE(analysis->intraData); >>> analysis->intraData = NULL; >>> @@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param, >>> x265_analysis_data* analysis) >>> X265_FREE((analysis->interData)->depth); >>> X265_FREE((analysis->interData)->modes); >>> if (!isMultiPassOpt && param->rc.cuTree) >>> + { >>> X265_FREE((analysis->interData)->cuQPOff); >>> + } >>> X265_FREE((analysis->interData)->mvpIdx[0]); >>> X265_FREE((analysis->interData)->mvpIdx[1]); >>> X265_FREE((analysis->interData)->mv[0]); >>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp >>> index 1f710e1ce..5eb123d31 100644 >>> --- a/source/encoder/encoder.cpp >>> +++ b/source/encoder/encoder.cpp >>> @@ -4444,6 +4444,26 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> } >>> } >>> } >>> + >>> + int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL; >>> + uint32_t reuseBufSize = 0; >>> + >>> + if (m_param->rc.cuTree) >>> + { >>> + if (m_param->analysisLoadReuseLevel == 10) >>> + reuseBufSize = depthBytes; >>> + else if (m_param->analysisLoadReuseLevel > 1) >>> + reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame; >>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize); >>> + if (!m_param->bUseAnalysisFile) >>> + { >>> + if (analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) >>> + cuQPOffSets = intraPic->cuQPOff; >>> + else >>> + cuQPOffSets = interPic->cuQPOff; >>> + } >>> + } >>> + >>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == >>> X265_TYPE_I) >>> { >>> if (m_param->bAnalysisType == HEVC_INFO) >>> @@ -4452,19 +4472,21 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> return; >>> >>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>> *partSizes = NULL; >>> - int8_t *cuQPBuf = NULL; >>> >>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); >>> depthBuf = tempBuf; >>> modeBuf = tempBuf + depthBytes; >>> partSizes = tempBuf + 2 * depthBytes; >>> - if (m_param->rc.cuTree) >>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->depth); >>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->chromaModes); >>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->partSizes); >>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >>> + if (m_param->rc.cuTree) >>> + { >>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, >>> m_analysisFileIn, cuQPOffSets); >>> + if (m_param->analysisLoadReuseLevel > 1 && >>> m_param->analysisLoadReuseLevel < 10) >>> + memcpy(analysis->intraData->cuQPOff, cuQPBuf, >>> sizeof(int8_t) * reuseBufSize); >>> + } >>> >>> size_t count = 0; >>> for (uint32_t d = 0; d < depthBytes; d++) >>> @@ -4480,7 +4502,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> memset(&(analysis->intraData)->depth[count], depthBuf[d], >>> bytes); >>> memset(&(analysis->intraData)->chromaModes[count], >>> modeBuf[d], bytes); >>> memset(&(analysis->intraData)->partSizes[count], >>> partSizes[d], bytes); >>> - if (m_param->rc.cuTree) >>> + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel >>> == 10) >>> memset(&(analysis->intraData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> count += bytes; >>> } >>> @@ -4515,7 +4537,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >>> MV* mv[2]; >>> int8_t* refIdx[2]; >>> - int8_t* cuQPBuf = NULL; >>> >>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; >>> bool bIntraInInter = false; >>> @@ -4535,12 +4556,15 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf); >>> depthBuf = tempBuf; >>> modeBuf = tempBuf + depthBytes; >>> - if (m_param->rc.cuTree) >>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->depth); >>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->modes); >>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, >>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); } >>> + if (m_param->rc.cuTree) >>> + { >>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, >>> m_analysisFileIn, cuQPOffSets); >>> + if (m_param->analysisLoadReuseLevel > 1 && >>> m_param->analysisLoadReuseLevel < 10) >>> + memcpy(analysis->interData->cuQPOff, cuQPBuf, >>> sizeof(int8_t) * reuseBufSize); >>> + } >>> >>> if (m_param->analysisLoadReuseLevel > 4) >>> { >>> @@ -4578,7 +4602,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> depthBuf[d] = 1; >>> memset(&(analysis->interData)->depth[count], >>> depthBuf[d], bytes); >>> memset(&(analysis->interData)->modes[count], >>> modeBuf[d], bytes); >>> - if (m_param->rc.cuTree) >>> + if (m_param->rc.cuTree && >>> m_param->analysisLoadReuseLevel == 10) >>> memset(&(analysis->interData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> if (m_param->analysisLoadReuseLevel > 4) >>> { >>> @@ -4736,7 +4760,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> int numPartitions = analysis->numPartitions; >>> int numCUsInFrame = analysis->numCUsInFrame; >>> int numCuInHeight = analysis->numCuInHeight; >>> - /* Allocate memory for scaled resoultion's numPartitions and >>> numCUsInFrame*/ >>> + /* Allocate memory for scaled resolution's numPartitions and >>> numCUsInFrame */ >>> analysis->numPartitions = m_param->num4x4Partitions; >>> analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; >>> analysis->numCuInHeight = cuLoc.heightInCU; >>> @@ -4808,25 +4832,40 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> X265_FREE(vbvCostBuf); >>> } >>> >>> + uint32_t reuseBufSize = 0; >>> + int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL; >>> + if (m_param->rc.cuTree) >>> + { >>> + if (m_param->analysisLoadReuseLevel == 10) >>> + reuseBufSize = depthBytes; >>> + else if (m_param->analysisLoadReuseLevel > 1) >>> + reuseBufSize = (MAX_NUM_CU_GEOMS / factor) * >>> (analysis->numCUsInFrame); >>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize); >>> + if (!m_param->bUseAnalysisFile) >>> + { >>> + if (analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) >>> + cuQPOffSets = intraPic->cuQPOff; >>> + else >>> + cuQPOffSets = interPic->cuQPOff; >>> + } >>> + } >>> + >>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == >>> X265_TYPE_I) >>> { >>> if (m_param->analysisLoadReuseLevel < 2) >>> return; >>> >>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>> *partSizes = NULL; >>> - int8_t *cuQPBuf = NULL; >>> >>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); >>> depthBuf = tempBuf; >>> modeBuf = tempBuf + depthBytes; >>> partSizes = tempBuf + 2 * depthBytes; >>> - if (m_param->rc.cuTree) >>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->depth); >>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->chromaModes); >>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->partSizes); >>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >>> + if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>> reuseBufSize, m_analysisFileIn, cuQPOffSets); } >>> >>> uint32_t count = 0; >>> for (uint32_t d = 0; d < depthBytes; d++) >>> @@ -4848,7 +4887,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> memset(&(analysis->intraData)->depth[count], >>> depthBuf[d], bytes); >>> memset(&(analysis->intraData)->chromaModes[count], >>> modeBuf[d], bytes); >>> memset(&(analysis->intraData)->partSizes[count], >>> partSizes[d], bytes); >>> - if (m_param->rc.cuTree) >>> + if (m_param->rc.cuTree && >>> m_param->analysisLoadReuseLevel == 10) >>> memset(&(analysis->intraData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> count += bytes; >>> d += getCUIndex(&cuLoc, &count, bytes, 1); >>> @@ -4886,7 +4925,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >>> MV* mv[2]; >>> int8_t* refIdx[2]; >>> - int8_t* cuQPBuf = NULL; >>> >>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; >>> bool bIntraInInter = false; >>> @@ -4900,12 +4938,16 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf); >>> depthBuf = tempBuf; >>> modeBuf = tempBuf + depthBytes; >>> - if (m_param->rc.cuTree) >>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->depth); >>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->modes); >>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>> depthBytes, m_analysisFileIn, interPic->cuQPOff); } >>> + if (m_param->rc.cuTree) >>> + { >>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, >>> m_analysisFileIn, cuQPOffSets); >>> + if (m_param->analysisLoadReuseLevel > 1 && >>> m_param->analysisLoadReuseLevel < 10) >>> + memcpy(&(analysis->interData)->cuQPOff, cuQPBuf, >>> sizeof(int8_t) * reuseBufSize); >>> + } >>> + >>> if (m_param->analysisLoadReuseLevel > 4) >>> { >>> partSize = modeBuf + depthBytes; >>> @@ -4954,7 +4996,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> { >>> memset(&(analysis->interData)->depth[count], >>> writeDepth, bytes); >>> memset(&(analysis->interData)->modes[count], >>> modeBuf[d], bytes); >>> - if (m_param->rc.cuTree) >>> + if (m_param->rc.cuTree && >>> m_param->analysisLoadReuseLevel == 10) >>> memset(&(analysis->interData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> if (m_param->analysisLoadReuseLevel == 10 && >>> bIntraInInter) >>> memset(&(analysis->intraData)->chromaModes[count], >>> chromaDir[d], bytes); >>> @@ -5046,7 +5088,9 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> } >>> } >>> else >>> + { >>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t), >>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, >>> m_analysisFileIn, interPic->ref); >>> + } >>> >>> consumedBytes += frameRecordSize; >>> if (numDir == 1) >>> @@ -5510,9 +5554,10 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> analysis->frameRecordSize += analysis->numCUsInFrame * >>> sizeof(sse_t); >>> } >>> >>> + uint32_t reuseQPBufsize = 0; >>> if (m_param->analysisSaveReuseLevel > 1) >>> { >>> - >>> + reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame; >>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType >>> == X265_TYPE_I) >>> { >>> for (uint32_t cuAddr = 0; cuAddr < analysis->numCUsInFrame; >>> cuAddr++) >>> @@ -5536,12 +5581,21 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> partSize = ctu->m_partSize[absPartIdx]; >>> intraDataCTU->partSizes[depthBytes] = partSize; >>> >>> - if (m_param->rc.cuTree) >>> + if (m_param->rc.cuTree && >>> m_param->analysisSaveReuseLevel == 10) >>> intraDataCTU->cuQPOff[depthBytes] = >>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); >>> absPartIdx += ctu->m_numPartitions >> (depth * 2); >>> } >>> + >>> + if (m_param->rc.cuTree && >>> m_param->analysisSaveReuseLevel < 10) >>> + { >>> + uint32_t nextCuIdx = (cuAddr + 1) * >>> MAX_NUM_CU_GEOMS; >>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < >>> nextCuIdx; i++) >>> + intraDataCTU->cuQPOff[i] = >>> (int8_t)(intraDataCTU->cuQPOff[i] - baseQP); >>> + } >>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >>> ctu->m_numPartitions); >>> } >>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel >>> == 10) >>> + reuseQPBufsize = depthBytes; >>> } >>> else >>> { >>> @@ -5567,7 +5621,7 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> predMode = 4; // used as indicator if the block >>> is coded as bidir >>> >>> interDataCTU->modes[depthBytes] = predMode; >>> - if (m_param->rc.cuTree) >>> + if (m_param->rc.cuTree && >>> m_param->analysisSaveReuseLevel == 10) >>> interDataCTU->cuQPOff[depthBytes] = >>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); >>> >>> if (m_param->analysisSaveReuseLevel > 4) >>> @@ -5599,13 +5653,23 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> } >>> absPartIdx += ctu->m_numPartitions >> (depth * 2); >>> } >>> + >>> + if (m_param->rc.cuTree && >>> m_param->analysisSaveReuseLevel < 10) >>> + { >>> + uint32_t nextCuIdx = (cuAddr + 1) * >>> MAX_NUM_CU_GEOMS; >>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < >>> nextCuIdx ; i++) >>> + interDataCTU->cuQPOff[i] = >>> (int8_t)(interDataCTU->cuQPOff[i] - baseQP); >>> + } >>> + >>> if (m_param->analysisSaveReuseLevel == 10 && >>> bIntraInInter) >>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >>> ctu->m_numPartitions); >>> } >>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel >>> == 10) >>> + reuseQPBufsize = depthBytes; >>> } >>> >>> if ((analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree) >>> - analysis->frameRecordSize += sizeof(uint8_t)* >>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + >>> (sizeof(int8_t) * depthBytes); >>> + analysis->frameRecordSize += sizeof(uint8_t)* >>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + >>> (sizeof(int8_t) * reuseQPBufsize); >>> else if (analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) >>> analysis->frameRecordSize += sizeof(uint8_t)* >>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3; >>> else >>> @@ -5613,7 +5677,8 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag >>> */ >>> analysis->frameRecordSize += depthBytes * 2; >>> if (m_param->rc.cuTree) >>> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes); >>> + analysis->frameRecordSize += (sizeof(int8_t) * >>> reuseQPBufsize); >>> + >>> if (m_param->analysisSaveReuseLevel > 4) >>> analysis->frameRecordSize += (depthBytes * 2); >>> >>> @@ -5669,7 +5734,7 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> X265_FWRITE((analysis->intraData)->chromaModes, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), >>> depthBytes, m_analysisFileOut); >>> if (m_param->rc.cuTree) >>> - X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), >>> depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), >>> reuseQPBufsize, m_analysisFileOut); >>> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), >>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut); >>> } >>> else >>> @@ -5677,7 +5742,7 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> if (m_param->rc.cuTree) >>> - X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), >>> depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), >>> reuseQPBufsize, m_analysisFileOut); >>> if (m_param->analysisSaveReuseLevel > 4) >>> { >>> X265_FWRITE((analysis->interData)->partSize, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> @@ -5762,7 +5827,7 @@ void >>> Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c >>> interData->mv[1][depthBytes].word = >>> ctu->m_mv[1][absPartIdx].word; >>> interData->mvpIdx[1][depthBytes] = >>> ctu->m_mvpIdx[1][absPartIdx]; >>> ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx]; >>> - predMode = 4; // used as indiacator if the block is >>> coded as bidir >>> + predMode = 4; // used as indicator if the block is >>> coded as bidir >>> } >>> interData->modes[depthBytes] = predMode; >>> >>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp >>> index 0adb0d0db..3bc01268b 100644 >>> --- a/source/encoder/slicetype.cpp >>> +++ b/source/encoder/slicetype.cpp >>> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, >>> bool bKeyframe) >>> >>> if (!framecnt) >>> { >>> - if (m_param->rc.cuTree) >>> + if (m_param->rc.cuTree && !m_param->analysisLoad) >>> >> [AM] Won't this implicitly turn OFF cutree at reuse-level 1? >> >>> cuTree(frames, 0, bKeyframe); >>> return; >>> } >>> diff --git a/source/x265.h b/source/x265.h >>> index f44040ba7..8d7a75826 100644 >>> --- a/source/x265.h >>> +++ b/source/x265.h >>> @@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data >>> uint8_t* modes; >>> char* partSizes; >>> uint8_t* chromaModes; >>> - int8_t* cuQPOff; >>> + int8_t* cuQPOff; >>> }x265_analysis_intra_data; >>> >>> typedef struct x265_analysis_MV >>> @@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data >>> uint8_t* interDir; >>> uint8_t* mvpIdx[2]; >>> int8_t* refIdx[2]; >>> - x265_analysis_MV* mv[2]; >>> + x265_analysis_MV* mv[2]; >>> int64_t* sadCost; >>> int8_t* cuQPOff; >>> }x265_analysis_inter_data; >>> -- >>> 2.20.1.windows.1 >>> >>> >>> -- >>> *With Regards,* >>> *Srikanth Kurapati.* >>> _______________________________________________ >>> x265-devel mailing list >>> x265-devel@videolan.org >>> https://mailman.videolan.org/listinfo/x265-devel >>> >> >> >> -- >> Regards, >> *Aruna Matheswaran,* >> Video Codec Engineer, >> Media & AI analytics BU, >> >> >> >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > > > -- > *With Regards,* > *Srikanth Kurapati.* > -- *With Regards,* *Srikanth Kurapati.*
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel