On Wed, Sep 2, 2020 at 6:53 PM Mahesh Pittala <mah...@multicorewareinc.com> wrote:
> > > On Tue, Sep 1, 2020 at 7:09 PM Aruna Matheswaran < > ar...@multicorewareinc.com> wrote: > >> >> >> On Mon, Aug 31, 2020 at 7:38 PM Mahesh Pittala < >> mah...@multicorewareinc.com> wrote: >> >>> From 812cccaa864da47dcf9567dccb81c9fef220c626 Mon Sep 17 00:00:00 2001 >>> From: maheshpittala <mah...@multicorewareinc.com> >>> Date: Sat, 1 Aug 2020 14:52:48 +0530 >>> Subject: [PATCH 1/1] Add support for reusing cutree offsets in all reuse >>> levels 1,2,5 >>> >>> Depth information required to load cutree offsets so writing depth >>> information >>> but not using in reuse level 1 >>> >> [AM] Will this restrict reuse-level 1 from sharing analysis info across >> non-dyadic resolutions? >> > No > [AM] I don't see the logic for cutree sharing between non dyadic resolutions either. Am I missing anything? > --- >>> source/abrEncApp.cpp | 23 ++-- >>> source/encoder/analysis.cpp | 2 +- >>> source/encoder/api.cpp | 53 +++++--- >>> source/encoder/encoder.cpp | 239 +++++++++++++++++++++--------------- >>> 4 files changed, 188 insertions(+), 129 deletions(-) >>> >>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp >>> index fa62ebf63..e26d3adfc 100644 >>> --- a/source/abrEncApp.cpp >>> +++ b/source/abrEncApp.cpp >>> @@ -330,15 +330,16 @@ namespace X265_NS { >>> >>> if (src->sliceType == X265_TYPE_IDR || src->sliceType == >>> X265_TYPE_I) >>> { >>> - if (m_param->analysisSaveReuseLevel < 2) >>> - goto ret; >>> x265_analysis_intra_data *intraDst, *intraSrc; >>> intraDst = >>> (x265_analysis_intra_data*)m_analysisInfo->intraData; >>> intraSrc = (x265_analysis_intra_data*)src->intraData; >>> - memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t) * >>> src->depthBytes); >>> - memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * >>> src->numCUsInFrame * src->numPartitions); >>> - memcpy(intraDst->partSizes, intraSrc->partSizes, >>> sizeof(char) * src->depthBytes); >>> - memcpy(intraDst->chromaModes, intraSrc->chromaModes, >>> sizeof(uint8_t) * src->depthBytes); >>> + memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t)* >>> src->depthBytes); >>> + if (m_param->analysisSaveReuseLevel > 1) >>> + { >>> + memcpy(intraDst->modes, intraSrc->modes, >>> sizeof(uint8_t)* src->numCUsInFrame * src->numPartitions); >>> + memcpy(intraDst->partSizes, intraSrc->partSizes, >>> sizeof(char)* src->depthBytes); >>> + memcpy(intraDst->chromaModes, intraSrc->chromaModes, >>> sizeof(uint8_t)* src->depthBytes); >>> + } >>> if (m_param->rc.cuTree) >>> memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, >>> sizeof(int8_t) * src->depthBytes); >>> } >>> @@ -347,13 +348,12 @@ namespace X265_NS { >>> bool bIntraInInter = (src->sliceType == X265_TYPE_P || >>> m_param->bIntraInBFrames); >>> int numDir = src->sliceType == X265_TYPE_P ? 1 : 2; >>> memcpy(m_analysisInfo->wt, src->wt, sizeof(WeightParam) * 3 >>> * numDir); >>> - if (m_param->analysisSaveReuseLevel < 2) >>> - goto ret; >>> x265_analysis_inter_data *interDst, *interSrc; >>> interDst = >>> (x265_analysis_inter_data*)m_analysisInfo->interData; >>> interSrc = (x265_analysis_inter_data*)src->interData; >>> - memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * >>> src->depthBytes); >>> - memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * >>> src->depthBytes); >>> + memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t)* >>> src->depthBytes); >>> + if (m_param->analysisSaveReuseLevel > 1) >>> + memcpy(interDst->modes, interSrc->modes, >>> sizeof(uint8_t)* src->depthBytes); >>> if (m_param->rc.cuTree) >>> memcpy(interDst->cuQPOff, interSrc->cuQPOff, >>> sizeof(int8_t) * src->depthBytes); >>> if (m_param->analysisSaveReuseLevel > 4) >>> @@ -378,11 +378,10 @@ namespace X265_NS { >>> } >>> } >>> } >>> - if (m_param->analysisSaveReuseLevel != 10) >>> + if (m_param->analysisSaveReuseLevel != 10 && >>> m_param->analysisSaveReuseLevel > 1) >>> memcpy(interDst->ref, interSrc->ref, sizeof(int32_t) * >>> src->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir); >>> } >>> >>> -ret: >>> //increment analysis Write counter >>> m_parent->m_analysisWriteCnt[m_id].incr(); >>> m_parent->m_analysisWrite[m_id][index].incr(); >>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp >>> index 157bae8cd..351d8a3c8 100644 >>> --- a/source/encoder/analysis.cpp >>> +++ b/source/encoder/analysis.cpp >>> @@ -3645,7 +3645,7 @@ int Analysis::calculateQpforCuSize(const CUData& >>> ctu, const CUGeom& cuGeom, int3 >>> qp += distortionData->offset[ctu.m_cuAddr]; >>> } >>> >>> - if (m_param->analysisLoadReuseLevel == 10 && m_param->rc.cuTree) >>> + if (m_param->analysisLoadReuseLevel >= 1 && m_param->rc.cuTree) >>> >> [AM] Shall replace this check with "if (m_param->analysisLoadReuseLevel >> && m_param->rc.cuTree)" >> > I will change it > >> { >>> int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + >>> cuGeom.absPartIdx; >>> if (ctu.m_slice->m_sliceType == I_SLICE) >>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp >>> index a986355e0..4ef3f04bc 100644 >>> --- a/source/encoder/api.cpp >>> +++ b/source/encoder/api.cpp >>> @@ -813,35 +813,41 @@ void x265_alloc_analysis_data(x265_param *param, >>> x265_analysis_data* analysis) >>> CHECKED_MALLOC_ZERO(analysis->wt, x265_weight_param, numPlanes >>> * numDir); >>> >>> //Allocate memory for intraData pointer >>> - if ((maxReuseLevel > 1) || isMultiPassOpt) >>> + if ((maxReuseLevel > 0) || isMultiPassOpt) >>> { >>> CHECKED_MALLOC_ZERO(intraData, x265_analysis_intra_data, 1); >>> CHECKED_MALLOC(intraData->depth, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> } >>> >> [AM] If cutree offsets and CU depths are shared in all the reuse levels, >> can we change the order of analysis data write/read such that CU depth and >> cutree write/read happens in the common section? By doing this you can >> eliminate the "reuse-level > 0" check all over the code. >> > CU depths are shared from reuse-leve 2 in code, no info about CU depths in > doc. I have enabled cutree offsets sharing in all reuse levels, > [AM] Sorry, I don't get your point. Could you please elaborate? > >>> - if (maxReuseLevel > 1) >>> + if (maxReuseLevel > 0) >>> { >>> - CHECKED_MALLOC_ZERO(intraData->modes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - CHECKED_MALLOC_ZERO(intraData->partSizes, char, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + if (maxReuseLevel > 1) >>> + { >>> + CHECKED_MALLOC_ZERO(intraData->modes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + CHECKED_MALLOC_ZERO(intraData->partSizes, char, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + } >>> if (param->rc.cuTree) >>> CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> } >>> analysis->intraData = intraData; >>> >>> - if ((maxReuseLevel > 1) || isMultiPassOpt) >>> + if ((maxReuseLevel > 0) || isMultiPassOpt) >>> { >>> //Allocate memory for interData pointer based on ReuseLevels >>> CHECKED_MALLOC_ZERO(interData, x265_analysis_inter_data, 1); >>> CHECKED_MALLOC(interData->depth, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - CHECKED_MALLOC_ZERO(interData->modes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - >>> if (param->rc.cuTree && !isMultiPassOpt) >>> CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> - CHECKED_MALLOC_ZERO(interData->mv[1], x265_analysis_MV, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + >>> + if (maxReuseLevel > 1) >>> + { >>> + CHECKED_MALLOC_ZERO(interData->modes, uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + CHECKED_MALLOC_ZERO(interData->mv[1], x265_analysis_MV, >>> analysis->numPartitions * analysis->numCUsInFrame); >>> + } >>> } >>> >>> if (maxReuseLevel > 4) >>> @@ -915,9 +921,12 @@ void x265_free_analysis_data(x265_param *param, >>> x265_analysis_data* analysis) >>> X265_FREE((analysis->intraData)->depth); >>> if (!isMultiPassOpt) >>> { >>> - X265_FREE((analysis->intraData)->modes); >>> - X265_FREE((analysis->intraData)->partSizes); >>> - X265_FREE((analysis->intraData)->chromaModes); >>> + if (maxReuseLevel > 1) >>> + { >>> + X265_FREE((analysis->intraData)->modes); >>> + X265_FREE((analysis->intraData)->partSizes); >>> + X265_FREE((analysis->intraData)->chromaModes); >>> + } >>> if (param->rc.cuTree) >>> X265_FREE((analysis->intraData)->cuQPOff); >>> } >>> @@ -929,13 +938,17 @@ void x265_free_analysis_data(x265_param *param, >>> x265_analysis_data* analysis) >>> if (analysis->interData) >>> { >>> X265_FREE((analysis->interData)->depth); >>> - X265_FREE((analysis->interData)->modes); >>> if (!isMultiPassOpt && param->rc.cuTree) >>> X265_FREE((analysis->interData)->cuQPOff); >>> - X265_FREE((analysis->interData)->mvpIdx[0]); >>> - X265_FREE((analysis->interData)->mvpIdx[1]); >>> - X265_FREE((analysis->interData)->mv[0]); >>> - X265_FREE((analysis->interData)->mv[1]); >>> + >>> + if (maxReuseLevel > 1) >>> + { >>> + X265_FREE((analysis->interData)->modes); >>> + X265_FREE((analysis->interData)->mvpIdx[0]); >>> + X265_FREE((analysis->interData)->mvpIdx[1]); >>> + X265_FREE((analysis->interData)->mv[0]); >>> + X265_FREE((analysis->interData)->mv[1]); >>> + } >>> >>> if (maxReuseLevel > 4) >>> { >>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp >>> index cf7bfb98d..c87feb84a 100644 >>> --- a/source/encoder/encoder.cpp >>> +++ b/source/encoder/encoder.cpp >>> @@ -4448,8 +4448,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> { >>> if (m_param->bAnalysisType == HEVC_INFO) >>> return; >>> - if (m_param->analysisLoadReuseLevel < 2) >>> - return; >>> >>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>> *partSizes = NULL; >>> int8_t *cuQPBuf = NULL; >>> @@ -4462,8 +4460,11 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->depth); >>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->chromaModes); >>> - X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->partSizes); >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + { >>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->chromaModes); >>> + X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->partSizes); >>> + } >>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >>> >>> size_t count = 0; >>> @@ -4478,38 +4479,40 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> partSizes[d] = SIZE_2Nx2N; >>> } >>> memset(&(analysis->intraData)->depth[count], depthBuf[d], >>> bytes); >>> - memset(&(analysis->intraData)->chromaModes[count], >>> modeBuf[d], bytes); >>> - memset(&(analysis->intraData)->partSizes[count], >>> partSizes[d], bytes); >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + { >>> + memset(&(analysis->intraData)->chromaModes[count], >>> modeBuf[d], bytes); >>> + memset(&(analysis->intraData)->partSizes[count], >>> partSizes[d], bytes); >>> + } >>> if (m_param->rc.cuTree) >>> memset(&(analysis->intraData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> count += bytes; >>> } >>> - >>> - if (!m_param->scaleFactor) >>> - { >>> - X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), >>> numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes); >>> - } >>> - else >>> + if (m_param->analysisLoadReuseLevel > 1) >>> { >>> - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad * >>> scaledNumPartition); >>> - X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad * >>> scaledNumPartition, m_analysisFileIn, intraPic->modes); >>> - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < numCUsLoad >>> * scaledNumPartition; ctu32Idx++, cnt += factor) >>> - memset(&(analysis->intraData)->modes[cnt], >>> tempLumaBuf[ctu32Idx], factor); >>> - X265_FREE(tempLumaBuf); >>> + if (!m_param->scaleFactor) >>> + { >>> + X265_FREAD((analysis->intraData)->modes, >>> sizeof(uint8_t), numCUsLoad * analysis->numPartitions, m_analysisFileIn, >>> intraPic->modes); >>> + } >>> + else >>> + { >>> + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad >>> * scaledNumPartition); >>> + X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad * >>> scaledNumPartition, m_analysisFileIn, intraPic->modes); >>> + for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < >>> numCUsLoad * scaledNumPartition; ctu32Idx++, cnt += factor) >>> + memset(&(analysis->intraData)->modes[cnt], >>> tempLumaBuf[ctu32Idx], factor); >>> + X265_FREE(tempLumaBuf); >>> + } >>> } >>> if (m_param->rc.cuTree) >>> X265_FREE(cuQPBuf); >>> X265_FREE(tempBuf); >>> consumedBytes += frameRecordSize; >>> } >>> - >>> else >>> { >>> uint32_t numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2; >>> uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 >>> : 3; >>> X265_FREAD((WeightParam*)analysis->wt, sizeof(WeightParam), >>> numPlanes * numDir, m_analysisFileIn, (picIn->analysisData.wt)); >>> - if (m_param->analysisLoadReuseLevel < 2) >>> - return; >>> >>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>> *partSize = NULL, *mergeFlag = NULL; >>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >>> @@ -4539,9 +4542,11 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->depth); >>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->modes); >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->modes); >>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, >>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); } >>> >>> + >>> if (m_param->analysisLoadReuseLevel > 4) >>> { >>> partSize = modeBuf + depthBytes; >>> @@ -4577,9 +4582,13 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && >>> depthBuf[d] == 0) >>> depthBuf[d] = 1; >>> memset(&(analysis->interData)->depth[count], >>> depthBuf[d], bytes); >>> - memset(&(analysis->interData)->modes[count], >>> modeBuf[d], bytes); >>> + >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + memset(&(analysis->interData)->modes[count], >>> modeBuf[d], bytes); >>> + >>> if (m_param->rc.cuTree) >>> memset(&(analysis->interData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> + >>> if (m_param->analysisLoadReuseLevel > 4) >>> { >>> if (m_param->scaleFactor && modeBuf[d] == >>> MODE_INTRA && partSize[d] == SIZE_NxN) >>> @@ -4643,7 +4652,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> } >>> } >>> } >>> - else >>> + else if (m_param->analysisLoadReuseLevel > 1) >>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t), >>> numCUsLoad * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, >>> interPic->ref); >>> >>> consumedBytes += frameRecordSize; >>> @@ -4810,9 +4819,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> >>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == >>> X265_TYPE_I) >>> { >>> - if (m_param->analysisLoadReuseLevel < 2) >>> - return; >>> - >>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>> *partSizes = NULL; >>> int8_t *cuQPBuf = NULL; >>> >>> @@ -4824,8 +4830,12 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->depth); >>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->chromaModes); >>> - X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->partSizes); >>> + >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + { >>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->chromaModes); >>> + X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, intraPic->partSizes); >>> + } >>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); } >>> >>> uint32_t count = 0; >>> @@ -4838,36 +4848,45 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> bytes /= 4; >>> numCTUCopied = 4; >>> } >>> - if (partSizes[d] == SIZE_NxN) >>> - partSizes[d] = SIZE_2Nx2N; >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + { >>> + if (partSizes[d] == SIZE_NxN) >>> + partSizes[d] = SIZE_2Nx2N; >>> + } >>> if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || >>> (depthBuf[d] && m_param->maxCUSize != 64)) >>> depthBuf[d]--; >>> >>> for (int numCTU = 0; numCTU < numCTUCopied; numCTU++) >>> { >>> memset(&(analysis->intraData)->depth[count], >>> depthBuf[d], bytes); >>> - memset(&(analysis->intraData)->chromaModes[count], >>> modeBuf[d], bytes); >>> - memset(&(analysis->intraData)->partSizes[count], >>> partSizes[d], bytes); >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + { >>> + memset(&(analysis->intraData)->chromaModes[count], >>> modeBuf[d], bytes); >>> + memset(&(analysis->intraData)->partSizes[count], >>> partSizes[d], bytes); >>> + } >>> if (m_param->rc.cuTree) >>> memset(&(analysis->intraData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> + >>> count += bytes; >>> d += getCUIndex(&cuLoc, &count, bytes, 1); >>> } >>> } >>> - >>> - cuLoc.evenRowIndex = 0; >>> - cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU; >>> - cuLoc.switchCondition = 0; >>> - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, >>> analysis->numCUsInFrame * scaledNumPartition); >>> - X265_FREAD(tempLumaBuf, sizeof(uint8_t), >>> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, >>> intraPic->modes); >>> - uint32_t cnt = 0; >>> - for (uint32_t ctu32Idx = 0; ctu32Idx < analysis->numCUsInFrame >>> * scaledNumPartition; ctu32Idx++) >>> + if (m_param->analysisLoadReuseLevel > 1) >>> { >>> - memset(&(analysis->intraData)->modes[cnt], >>> tempLumaBuf[ctu32Idx], factor); >>> - cnt += factor; >>> - ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0); >>> + cuLoc.evenRowIndex = 0; >>> + cuLoc.oddRowIndex = m_param->num4x4Partitions * >>> cuLoc.widthInCU; >>> + cuLoc.switchCondition = 0; >>> + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, >>> analysis->numCUsInFrame * scaledNumPartition); >>> + X265_FREAD(tempLumaBuf, sizeof(uint8_t), >>> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, >>> intraPic->modes); >>> + uint32_t cnt = 0; >>> + for (uint32_t ctu32Idx = 0; ctu32Idx < >>> analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++) >>> + { >>> + memset(&(analysis->intraData)->modes[cnt], >>> tempLumaBuf[ctu32Idx], factor); >>> + cnt += factor; >>> + ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0); >>> + } >>> + X265_FREE(tempLumaBuf); >>> } >>> - X265_FREE(tempLumaBuf); >>> if (m_param->rc.cuTree) >>> X265_FREE(cuQPBuf); >>> X265_FREE(tempBuf); >>> @@ -4879,8 +4898,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> uint32_t numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2; >>> uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 >>> : 3; >>> X265_FREAD((WeightParam*)analysis->wt, sizeof(WeightParam), >>> numPlanes * numDir, m_analysisFileIn, (picIn->analysisData.wt)); >>> - if (m_param->analysisLoadReuseLevel < 2) >>> - return; >>> >>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, >>> *partSize = NULL, *mergeFlag = NULL; >>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; >>> @@ -4904,8 +4921,10 @@ void >>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x >>> cuQPBuf = X265_MALLOC(int8_t, depthBytes); >>> >>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->depth); >>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->modes); >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, >>> m_analysisFileIn, interPic->modes); >>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), >>> depthBytes, m_analysisFileIn, interPic->cuQPOff); } >>> + >>> if (m_param->analysisLoadReuseLevel > 4) >>> { >>> partSize = modeBuf + depthBytes; >>> @@ -4953,7 +4972,9 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> for (int numCTU = 0; numCTU < numCTUCopied; numCTU++) >>> { >>> memset(&(analysis->interData)->depth[count], >>> writeDepth, bytes); >>> - memset(&(analysis->interData)->modes[count], >>> modeBuf[d], bytes); >>> + if (m_param->analysisLoadReuseLevel > 1) >>> + memset(&(analysis->interData)->modes[count], >>> modeBuf[d], bytes); >>> + >>> if (m_param->rc.cuTree) >>> memset(&(analysis->interData)->cuQPOff[count], >>> cuQPBuf[d], bytes); >>> if (m_param->analysisLoadReuseLevel == 10 && >>> bIntraInInter) >>> @@ -5045,7 +5066,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* >>> analysis, int curPoc, const x >>> X265_FREE(tempLumaBuf); >>> } >>> } >>> - else >>> + else if (m_param->analysisLoadReuseLevel > 1) >>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t), >>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, >>> m_analysisFileIn, interPic->ref); >>> >>> consumedBytes += frameRecordSize; >>> @@ -5155,6 +5176,8 @@ int >>> Encoder::validateAnalysisData(x265_analysis_validate* saveParam, int writeFl >>> isIncompatibleReuseLevel = true; >>> else if ((loadLevel >= 2 && loadLevel <= 4) && (saveLevel < 2 >>> || saveLevel > 6)) >>> isIncompatibleReuseLevel = true; >>> + else if (loadLevel == 1 && saveLevel < 1) >>> + isIncompatibleReuseLevel = true; >>> else if (!saveLevel) >>> isIncompatibleReuseLevel = true; >>> >>> @@ -5167,7 +5190,7 @@ int >>> Encoder::validateAnalysisData(x265_analysis_validate* saveParam, int writeFl >>> >>> int bcutree; >>> X265_FREAD(&bcutree, sizeof(int), 1, m_analysisFileIn, >>> &(saveParam->cuTree)); >>> - if (loadLevel == 10 && m_param->rc.cuTree && (!bcutree || >>> saveLevel < 2)) >>> + if (loadLevel >= 1 && m_param->rc.cuTree && (!bcutree || >>> saveLevel < 1)) >>> { >>> x265_log(NULL, X265_LOG_ERROR, "Error reading cu-tree info. >>> Disabling cutree offsets. \n"); >>> m_param->rc.cuTree = 0; >>> @@ -5510,7 +5533,7 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> analysis->frameRecordSize += analysis->numCUsInFrame * >>> sizeof(sse_t); >>> } >>> >>> - if (m_param->analysisSaveReuseLevel > 1) >>> + if (m_param->analysisSaveReuseLevel > 0) >>> { >>> >>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType >>> == X265_TYPE_I) >>> @@ -5529,18 +5552,21 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> { >>> depth = ctu->m_cuDepth[absPartIdx]; >>> intraDataCTU->depth[depthBytes] = depth; >>> + if (m_param->analysisSaveReuseLevel > 1) >>> + { >>> + mode = ctu->m_chromaIntraDir[absPartIdx]; >>> + intraDataCTU->chromaModes[depthBytes] = mode; >>> >>> - mode = ctu->m_chromaIntraDir[absPartIdx]; >>> - intraDataCTU->chromaModes[depthBytes] = mode; >>> - >>> - partSize = ctu->m_partSize[absPartIdx]; >>> - intraDataCTU->partSizes[depthBytes] = partSize; >>> - >>> + partSize = ctu->m_partSize[absPartIdx]; >>> + intraDataCTU->partSizes[depthBytes] = partSize; >>> + } >>> if (m_param->rc.cuTree) >>> intraDataCTU->cuQPOff[depthBytes] = >>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); >>> + >>> absPartIdx += ctu->m_numPartitions >> (depth * 2); >>> } >>> - memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >>> ctu->m_numPartitions); >>> + if (m_param->analysisSaveReuseLevel > 1) >>> + memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >>> ctu->m_numPartitions); >>> } >>> } >>> else >>> @@ -5561,12 +5587,14 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> { >>> depth = ctu->m_cuDepth[absPartIdx]; >>> interDataCTU->depth[depthBytes] = depth; >>> + if (m_param->analysisSaveReuseLevel > 1) >>> + { >>> + predMode = ctu->m_predMode[absPartIdx]; >>> + if (m_param->analysisSaveReuseLevel != 10 && >>> ctu->m_refIdx[1][absPartIdx] != -1) >>> + predMode = 4; // used as indicator if the >>> block is coded as bidir >>> >>> - predMode = ctu->m_predMode[absPartIdx]; >>> - if (m_param->analysisSaveReuseLevel != 10 && >>> ctu->m_refIdx[1][absPartIdx] != -1) >>> - predMode = 4; // used as indicator if the block >>> is coded as bidir >>> - >>> - interDataCTU->modes[depthBytes] = predMode; >>> + interDataCTU->modes[depthBytes] = predMode; >>> + } >>> if (m_param->rc.cuTree) >>> interDataCTU->cuQPOff[depthBytes] = >>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); >>> >>> @@ -5603,17 +5631,22 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * >>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* >>> ctu->m_numPartitions); >>> } >>> } >>> - >>> - if ((analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree) >>> + if ((analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree && >>> m_param->analysisSaveReuseLevel == 1) >>> + analysis->frameRecordSize += depthBytes + (sizeof(int8_t)* >>> depthBytes); >>> + else if ((analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) && m_param->analysisSaveReuseLevel == 1) >>> + analysis->frameRecordSize += depthBytes; >>> + else if ((analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree) >>> analysis->frameRecordSize += sizeof(uint8_t)* >>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + >>> (sizeof(int8_t) * depthBytes); >>> else if (analysis->sliceType == X265_TYPE_IDR || >>> analysis->sliceType == X265_TYPE_I) >>> analysis->frameRecordSize += sizeof(uint8_t)* >>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3; >>> else >>> { >>> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag >>> */ >>> - analysis->frameRecordSize += depthBytes * 2; >>> + analysis->frameRecordSize += depthBytes; >>> if (m_param->rc.cuTree) >>> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes); >>> + analysis->frameRecordSize += (sizeof(int8_t) * >>> depthBytes); >>> + if (m_param->analysisSaveReuseLevel > 1) >>> + analysis->frameRecordSize += depthBytes; >>> if (m_param->analysisSaveReuseLevel > 4) >>> analysis->frameRecordSize += (depthBytes * 2); >>> >>> @@ -5627,7 +5660,7 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> if (bIntraInInter) >>> analysis->frameRecordSize += sizeof(uint8_t)* >>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes; >>> } >>> - else >>> + else if (m_param->analysisSaveReuseLevel > 1) >>> analysis->frameRecordSize += sizeof(int32_t)* >>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir; >>> } >>> analysis->depthBytes = depthBytes; >>> @@ -5661,44 +5694,58 @@ void >>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD >>> X265_FWRITE((WeightParam*)analysis->wt, sizeof(WeightParam), >>> numPlanes * numDir, m_analysisFileOut); >>> >>> if (m_param->analysisSaveReuseLevel < 2) >>> - return; >>> - >>> - if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == >>> X265_TYPE_I) >>> { >>> - X265_FWRITE((analysis->intraData)->depth, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> - X265_FWRITE((analysis->intraData)->chromaModes, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> - X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), >>> depthBytes, m_analysisFileOut); >>> - if (m_param->rc.cuTree) >>> - X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), >>> depthBytes, m_analysisFileOut); >>> - X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), >>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut); >>> + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType >>> == X265_TYPE_I) >>> + { >>> + X265_FWRITE((analysis->intraData)->depth, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> + if (m_param->rc.cuTree) >>> + X265_FWRITE((analysis->intraData)->cuQPOff, >>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>> + } >>> + else >>> + { >>> + X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> + if (m_param->rc.cuTree) >>> + X265_FWRITE((analysis->interData)->cuQPOff, >>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>> + } >>> } >>> else >>> { >>> - X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> - X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> - if (m_param->rc.cuTree) >>> - X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), >>> depthBytes, m_analysisFileOut); >>> - if (m_param->analysisSaveReuseLevel > 4) >>> + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType >>> == X265_TYPE_I) >>> { >>> - X265_FWRITE((analysis->interData)->partSize, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> - X265_FWRITE((analysis->interData)->mergeFlag, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> - if (m_param->analysisSaveReuseLevel == 10) >>> + X265_FWRITE((analysis->intraData)->depth, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->intraData)->chromaModes, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), >>> depthBytes, m_analysisFileOut); >>> + if (m_param->rc.cuTree) >>> + X265_FWRITE((analysis->intraData)->cuQPOff, >>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), >>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut); >>> + } >>> + else >>> + { >>> + X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> + if (m_param->rc.cuTree) >>> + X265_FWRITE((analysis->interData)->cuQPOff, >>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>> + if (m_param->analysisSaveReuseLevel > 4) >>> { >>> - X265_FWRITE((analysis->interData)->interDir, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> - if (bIntraInInter) >>> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> - for (uint32_t dir = 0; dir < numDir; dir++) >>> + X265_FWRITE((analysis->interData)->partSize, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->interData)->mergeFlag, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> + if (m_param->analysisSaveReuseLevel == 10) >>> { >>> - X265_FWRITE((analysis->interData)->mvpIdx[dir], >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> - X265_FWRITE((analysis->interData)->refIdx[dir], >>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>> - X265_FWRITE((analysis->interData)->mv[dir], >>> sizeof(MV), depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->interData)->interDir, >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> + if (bIntraInInter) >>> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t), >>> depthBytes, m_analysisFileOut); >>> + for (uint32_t dir = 0; dir < numDir; dir++) >>> + { >>> + X265_FWRITE((analysis->interData)->mvpIdx[dir], >>> sizeof(uint8_t), depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->interData)->refIdx[dir], >>> sizeof(int8_t), depthBytes, m_analysisFileOut); >>> + X265_FWRITE((analysis->interData)->mv[dir], >>> sizeof(MV), depthBytes, m_analysisFileOut); >>> + } >>> + if (bIntraInInter) >>> + X265_FWRITE((analysis->intraData)->modes, >>> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, >>> m_analysisFileOut); >>> } >>> - if (bIntraInInter) >>> - X265_FWRITE((analysis->intraData)->modes, >>> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, >>> m_analysisFileOut); >>> } >>> + if (m_param->analysisSaveReuseLevel > 1 && >>> m_param->analysisSaveReuseLevel != 10) >>> + X265_FWRITE((analysis->interData)->ref, >>> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * >>> numDir, m_analysisFileOut); >>> } >>> - if (m_param->analysisSaveReuseLevel != 10) >>> - X265_FWRITE((analysis->interData)->ref, sizeof(int32_t), >>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, >>> m_analysisFileOut); >>> - >>> } >>> #undef X265_FWRITE >>> } >>> -- >>> 2.18.2 >>> >>> _______________________________________________ >>> x265-devel mailing list >>> x265-devel@videolan.org >>> https://mailman.videolan.org/listinfo/x265-devel >>> >> >> >> -- >> Regards, >> *Aruna Matheswaran,* >> Video Codec Engineer, >> Media & AI analytics BU, >> >> >> >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- Regards, *Aruna Matheswaran,* Video Codec Engineer, Media & AI analytics BU,
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel