As expected there is not much change in performance, since SAO is very less time consuming module compared to other modules like look-ahead, motion estimation etc. And I have modified only one function in SAO. I have tested one 4k video(tearsofsteel-4k-1000f-s214.y4m) with medium preset which has 1214 frames using intel vtune.
*Before: 2545,02 (total cpu time)* *After: 2536,95 (total CPU time)* Thanks Ashok On Mon, Jan 18, 2016 at 9:39 PM, Pradeep Ramachandran < [email protected]> wrote: > Ashok, > What is the impact on performance that you expect from these changes to > SAO? Can you run encodes with 4K videos in ABR mode for ultrafast, medium, > and veryslow and share the impact of performance if we expect an impact > (which I think there will be)? > > Pradeep Ramachandran, PhD > Solution Architect at www.multicorewareinc.com/ > Adjunct Faculty at www.cse.iitm.ac.in/ > pradeeprama.info/ > Ph: +91 99627 82018 > > On Mon, Jan 18, 2016 at 9:21 PM, <[email protected]> wrote: > >> # HG changeset patch >> # User Ashok Kumar Mishra<[email protected]> >> # Date 1453132285 -19800 >> # Mon Jan 18 21:21:25 2016 +0530 >> # Node ID 0f89b83c6fe64181296a51392ff52236dc6a934e >> # Parent add820b8cde0fc82a643fd32717def5a3e2f3888 >> SAO: perform merge distortion and cost calculation in one loop >> >> diff -r add820b8cde0 -r 0f89b83c6fe6 source/encoder/sao.cpp >> --- a/source/encoder/sao.cpp Mon Jan 18 21:20:57 2016 +0530 >> +++ b/source/encoder/sao.cpp Mon Jan 18 21:21:25 2016 +0530 >> @@ -758,16 +758,6 @@ >> std::swap(m_tmpL1[2], m_tmpL2[2]); >> } >> >> -void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* >> saoUnitSrc) >> -{ >> - saoUnitDst->mergeMode = saoUnitSrc->mergeMode; >> - saoUnitDst->typeIdx = saoUnitSrc->typeIdx; >> - saoUnitDst->bandPos = saoUnitSrc->bandPos; >> - >> - for (int i = 0; i < SAO_NUM_OFFSET; i++) >> - saoUnitDst->offset[i] = saoUnitSrc->offset[i]; >> -} >> - >> /* Calculate SAO statistics for current CTU without non-crossing slice */ >> void SAO::calcSaoStatsCu(int addr, int plane) >> { >> @@ -1233,158 +1223,12 @@ >> m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = >> m_numNoSao[1] / ((double)numctus); >> } >> >> -void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY) >> -{ >> - SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2]; >> - double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda}; >> - bool allowMerge[2]; // left, up >> - allowMerge[1] = (idxY > 0); >> - >> - for (int idxX = 0; idxX < m_numCuInWidth; idxX++) >> - { >> - int addr = idxX + idxY * m_numCuInWidth; >> - int addrUp = idxY ? addr - m_numCuInWidth : -1; >> - int addrLeft = idxX ? addr - 1 : -1; >> - allowMerge[0] = (idxX > 0); >> - >> - m_entropyCoder.load(m_rdContexts.cur); >> - if (allowMerge[0]) >> - m_entropyCoder.codeSaoMerge(0); >> - if (allowMerge[1]) >> - m_entropyCoder.codeSaoMerge(0); >> - m_entropyCoder.store(m_rdContexts.temp); >> - >> - // reset stats Y, Cb, Cr >> - X265_CHECK(sizeof(PerPlane) == (sizeof(int32_t) * (NUM_PLANE * >> MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS)), "Found Padding space in struct >> PerPlane"); >> - >> - // TODO: Confirm the address space is continuous >> - if (m_param->bSaoNonDeblocked) >> - { >> - memcpy(m_count, m_countPreDblk[addr], sizeof(m_count)); >> - memcpy(m_offsetOrg, m_offsetOrgPreDblk[addr], >> sizeof(m_offsetOrg)); >> - } >> - else >> - { >> - memset(m_count, 0, sizeof(m_count)); >> - memset(m_offsetOrg, 0, sizeof(m_offsetOrg)); >> - } >> - >> - saoParam->ctuParam[0][addr].reset(); >> - saoParam->ctuParam[1][addr].reset(); >> - saoParam->ctuParam[2][addr].reset(); >> - >> - if (saoParam->bSaoFlag[0]) >> - calcSaoStatsCu(addr, 0); >> - >> - if (saoParam->bSaoFlag[1]) >> - { >> - calcSaoStatsCu(addr, 1); >> - calcSaoStatsCu(addr, 2); >> - } >> - >> - double mergeDist[NUM_MERGE_MODE] = {0.0, 0.0, 0.0}; >> - saoLumaComponentParamDist(saoParam, addr, mergeDist); >> - if (m_chromaFormat != X265_CSP_I400) >> - saoChromaComponentParamDist(saoParam, addr, mergeDist); >> - >> - // merge left or merge up >> - for (int plane = 0; plane < 3; plane++) >> - { >> - for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++) >> - { >> - SaoCtuParam* mergeSrcParam = NULL; >> - if (addrLeft >= 0 && mergeIdx == 0) >> - mergeSrcParam = >> &(saoParam->ctuParam[plane][addrLeft]); >> - else if (addrUp >= 0 && mergeIdx == 1) >> - mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]); >> - if (mergeSrcParam) >> - { >> - int64_t estDist = 0; >> - int typeIdx = mergeSrcParam->typeIdx; >> - if (typeIdx >= 0) >> - { >> - int bandPos = (typeIdx == SAO_BO) ? >> mergeSrcParam->bandPos : 0; >> - for (int classIdx = 0; classIdx < >> SAO_NUM_OFFSET; classIdx++) >> - { >> - int mergeOffset = >> mergeSrcParam->offset[classIdx]; >> - estDist += >> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, >> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]); >> - } >> - } >> - >> - copySaoUnit(&mergeSaoParam[plane][mergeIdx], >> mergeSrcParam); >> - mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx >> ? SAO_MERGE_UP : SAO_MERGE_LEFT; >> - mergeDist[mergeIdx + 1] += ((double)estDist / >> lambda[plane]); >> - } >> - } >> - } >> - >> - if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1]) >> - { >> - // Cost of new SAO_params >> - m_entropyCoder.load(m_rdContexts.cur); >> - m_entropyCoder.resetBits(); >> - if (allowMerge[0]) >> - m_entropyCoder.codeSaoMerge(0); >> - if (allowMerge[1]) >> - m_entropyCoder.codeSaoMerge(0); >> - for (int plane = 0; plane < 3; plane++) >> - { >> - if (saoParam->bSaoFlag[plane > 0]) >> - >> m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane); >> - } >> - >> - uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); >> - double bestCost = mergeDist[0] + (double)rate; >> - m_entropyCoder.store(m_rdContexts.temp); >> - >> - // Cost of Merge >> - for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx) >> - { >> - if (!allowMerge[mergeIdx]) >> - continue; >> - >> - m_entropyCoder.load(m_rdContexts.cur); >> - m_entropyCoder.resetBits(); >> - if (allowMerge[0]) >> - m_entropyCoder.codeSaoMerge(1 - mergeIdx); >> - if (allowMerge[1] && (mergeIdx == 1)) >> - m_entropyCoder.codeSaoMerge(1); >> - >> - rate = m_entropyCoder.getNumberOfWrittenBits(); >> - double mergeCost = mergeDist[mergeIdx + 1] + >> (double)rate; >> - if (mergeCost < bestCost) >> - { >> - SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : >> SAO_MERGE_LEFT; >> - bestCost = mergeCost; >> - m_entropyCoder.store(m_rdContexts.temp); >> - for (int plane = 0; plane < 3; plane++) >> - { >> - mergeSaoParam[plane][mergeIdx].mergeMode = >> mergeMode; >> - if (saoParam->bSaoFlag[plane > 0]) >> - >> copySaoUnit(&saoParam->ctuParam[plane][addr], >> &mergeSaoParam[plane][mergeIdx]); >> - } >> - } >> - } >> - >> - if (saoParam->ctuParam[0][addr].typeIdx < 0) >> - m_numNoSao[0]++; >> - if (m_chromaFormat != X265_CSP_I400 && >> saoParam->ctuParam[1][addr].typeIdx < 0) >> - m_numNoSao[1]++; >> - >> - m_entropyCoder.load(m_rdContexts.temp); >> - m_entropyCoder.store(m_rdContexts.cur); >> - } >> - } >> -} >> - >> void SAO::rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, >> int addr) >> { >> - SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2]; >> double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda}; >> const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; // >> left, up >> >> - const int addrUp = rowBaseAddr ? addr - m_numCuInWidth : -1; >> - const int addrLeft = idxX ? addr - 1 : -1; >> + const int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ? >> addr - m_numCuInWidth : -1)};// left, up >> >> bool chroma = m_param->internalCsp != X265_CSP_I400; >> int planes = chroma ? 3 : 1; >> @@ -1425,45 +1269,14 @@ >> calcSaoStatsCu(addr, 1); >> calcSaoStatsCu(addr, 2); >> saoStatsInitialOffset(1); >> - saoStatsInitialOffset(2); >> +// saoStatsInitialOffset(2); >> } >> >> - double mergeDist[NUM_MERGE_MODE] = {0.0, 0.0, 0.0}; >> + double mergeDist[NUM_MERGE_MODE] = { 0.0 }; >> saoLumaComponentParamDist(saoParam, addr, mergeDist); >> if (chroma) >> saoChromaComponentParamDist(saoParam, addr, mergeDist); >> >> - // merge left or merge up >> - for (int plane = 0; plane < planes; plane++) >> - { >> - for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++) >> - { >> - SaoCtuParam* mergeSrcParam = NULL; >> - if (addrLeft >= 0 && mergeIdx == 0) >> - mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]); >> - else if (addrUp >= 0 && mergeIdx == 1) >> - mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]); >> - if (mergeSrcParam) >> - { >> - int64_t estDist = 0; >> - int typeIdx = mergeSrcParam->typeIdx; >> - if (typeIdx >= 0) >> - { >> - int bandPos = (typeIdx == SAO_BO) ? >> mergeSrcParam->bandPos : 0; >> - for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; >> classIdx++) >> - { >> - int mergeOffset = >> mergeSrcParam->offset[classIdx]; >> - estDist += >> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, >> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]); >> - } >> - } >> - >> - copySaoUnit(&mergeSaoParam[plane][mergeIdx], >> mergeSrcParam); >> - mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ? >> SAO_MERGE_UP : SAO_MERGE_LEFT; >> - mergeDist[mergeIdx + 1] += ((double)estDist / >> lambda[plane]); >> - } >> - } >> - } >> - >> if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1]) >> { >> // Cost of new SAO_params >> @@ -1483,12 +1296,31 @@ >> double bestCost = mergeDist[0] + (double)rate; >> m_entropyCoder.store(m_rdContexts.temp); >> >> - // Cost of Merge >> + // Cost of merge left or Up >> for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx) >> { >> if (!allowMerge[mergeIdx]) >> continue; >> >> + for (int plane = 0; plane < 3; plane++) >> + { >> + int64_t estDist = 0; >> + SaoCtuParam* mergeSrcParam = >> &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]); >> + int typeIdx = mergeSrcParam->typeIdx; >> + if (typeIdx >= 0) >> + { >> + int bandPos = (typeIdx == SAO_BO) ? >> mergeSrcParam->bandPos : 0; >> + for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; >> classIdx++) >> + { >> + int mergeOffset = >> mergeSrcParam->offset[classIdx]; >> + estDist += >> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, >> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]); >> + } >> + } >> + >> + mergeDist[mergeIdx + 1] += ((double)estDist / >> lambda[plane]); >> + } >> + >> + >> m_entropyCoder.load(m_rdContexts.cur); >> m_entropyCoder.resetBits(); >> if (allowMerge[0]) >> @@ -1505,9 +1337,17 @@ >> m_entropyCoder.store(m_rdContexts.temp); >> for (int plane = 0; plane < planes; plane++) >> { >> - mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode; >> if (saoParam->bSaoFlag[plane > 0]) >> - copySaoUnit(&saoParam->ctuParam[plane][addr], >> &mergeSaoParam[plane][mergeIdx]); >> + { >> + SaoCtuParam* dstCtuParam = >> &saoParam->ctuParam[plane][addr]; >> + SaoCtuParam* mergeSrcParam = >> &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]); >> + dstCtuParam->mergeMode = mergeMode; >> + dstCtuParam->typeIdx = mergeSrcParam->typeIdx; >> + dstCtuParam->bandPos = mergeSrcParam->bandPos; >> + >> + for (int i = 0; i < SAO_NUM_OFFSET; i++) >> + dstCtuParam->offset[i] = >> mergeSrcParam->offset[i]; >> + } >> } >> } >> } >> @@ -1524,75 +1364,81 @@ >> >> // Rounds the division of initial offsets by the number of samples in >> // each of the statistics table entries. >> -void SAO::saoStatsInitialOffset(int plane) >> +void SAO::saoStatsInitialOffset(int planes) >> { >> // EO >> - for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++) >> + for (int plane = planes; plane <= 2*planes; plane++) >> { >> - for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++) >> + for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++) >> { >> - int32_t count = m_count[plane][typeIdx][classIdx]; >> - int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx]; >> - int32_t& offsetOut = m_offset[plane][typeIdx][classIdx]; >> + for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++) >> + { >> + int32_t count = m_count[plane][typeIdx][classIdx]; >> + int32_t& offsetOrg = >> m_offsetOrg[plane][typeIdx][classIdx]; >> + int32_t& offsetOut = m_offset[plane][typeIdx][classIdx]; >> + >> + if (count) >> + { >> + offsetOut = roundIBDI(offsetOrg, count << >> SAO_BIT_INC); >> + offsetOut = x265_clip3(-OFFSET_THRESH + 1, >> OFFSET_THRESH - 1, offsetOut); >> + >> + if (classIdx < 3) >> + offsetOut = X265_MAX(offsetOut, 0); >> + else >> + offsetOut = X265_MIN(offsetOut, 0); >> + } >> + } >> + } >> + } >> + >> + // BO >> + for (int plane = planes; plane <= 2*planes; plane++) >> + { >> + for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; >> classIdx++) >> + { >> + int32_t count = m_count[plane][SAO_BO][classIdx]; >> + int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx]; >> + int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx]; >> >> if (count) >> { >> offsetOut = roundIBDI(offsetOrg, count << SAO_BIT_INC); >> offsetOut = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH >> - 1, offsetOut); >> - >> - if (classIdx < 3) >> - offsetOut = X265_MAX(offsetOut, 0); >> - else >> - offsetOut = X265_MIN(offsetOut, 0); >> } >> } >> } >> - // BO >> - for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++) >> - { >> - int32_t count = m_count[plane][SAO_BO][classIdx]; >> - int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx]; >> - int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx]; >> - >> - if (count) >> - { >> - offsetOut = roundIBDI(offsetOrg, count << SAO_BIT_INC); >> - offsetOut = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - >> 1, offsetOut); >> - } >> - } >> } >> >> -inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, >> int offset, int32_t count, int32_t offsetOrg, int32_t* >> currentDistortionTableBo, double* currentRdCostTableBo) >> +inline int SAO::estIterOffset(int typeIdx, double lambda, int offset, >> int32_t count, int32_t offsetOrg, int& distBOClasses, double& costBOClasses) >> { >> - int offsetOut = 0; >> + int bestOffset = 0; >> >> - // Assuming sending quantized value 0 results in zero offset and >> sending the value zero needs 1 bit. entropy coder can be used to measure >> the exact rate here. >> - double tempMinCost = lambda; >> + // Assuming sending quantized value 0 results in zero offset and >> sending the value zero needs 1 bit. >> + // entropy coder can be used to measure the exact rate here. >> + double bestCost = lambda; >> while (offset != 0) >> { >> // Calculate the bits required for signalling the offset >> - int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) : >> (abs(offset) + 1); >> + int rate = (typeIdx == SAO_BO) ? (abs(offset) + 2) : >> (abs(offset) + 1); >> if (abs(offset) == OFFSET_THRESH - 1) >> - tempRate--; >> + rate--; >> >> // Do the dequntization before distorion calculation >> - int tempOffset = offset << SAO_BIT_INC; >> - int64_t tempDist = estSaoDist(count, tempOffset, offsetOrg); >> - double tempCost = ((double)tempDist + lambda * >> (double)tempRate); >> - if (tempCost < tempMinCost) >> + int64_t dist = estSaoDist(count, offset << SAO_BIT_INC, >> offsetOrg); >> + double cost = ((double)dist + lambda * (double)rate); >> + if (cost < bestCost) >> { >> - tempMinCost = tempCost; >> - offsetOut = offset; >> + bestCost = cost; >> + bestOffset = offset; >> if (typeIdx == SAO_BO) >> { >> - currentDistortionTableBo[classIdx - 1] = (int)tempDist; >> - currentRdCostTableBo[classIdx - 1] = tempCost; >> + distBOClasses = (int)dist; >> + costBOClasses = bestCost; >> } >> } >> offset = (offset > 0) ? (offset - 1) : (offset + 1); >> } >> - >> - return offsetOut; >> + return bestOffset; >> } >> >> void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, >> double* mergeDist) >> @@ -1602,8 +1448,8 @@ >> >> SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr]; >> >> - int currentDistortionTableBo[MAX_NUM_SAO_CLASS]; >> - double currentRdCostTableBo[MAX_NUM_SAO_CLASS]; >> + int distBOClasses[MAX_NUM_SAO_CLASS]; >> + double costBOClasses[MAX_NUM_SAO_CLASS]; >> >> m_entropyCoder.load(m_rdContexts.temp); >> m_entropyCoder.resetBits(); >> @@ -1621,15 +1467,9 @@ >> int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx]; >> int32_t& offsetOut = m_offset[0][typeIdx][classIdx]; >> >> - if (count) >> - { >> - offsetOut = estIterOffset(typeIdx, classIdx, >> m_lumaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo, >> currentRdCostTableBo); >> - } >> - else >> - { >> - offsetOrg = 0; >> - offsetOut = 0; >> - } >> + if (offsetOut) >> + offsetOut = estIterOffset(typeIdx, m_lumaLambda, >> offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]); >> + >> estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, >> offsetOrg); >> } >> >> @@ -1665,44 +1505,37 @@ >> int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx]; >> int32_t& offsetOut = m_offset[0][SAO_BO][classIdx]; >> >> - currentDistortionTableBo[classIdx - 1] = 0; >> - currentRdCostTableBo[classIdx - 1] = m_lumaLambda; >> + distBOClasses[classIdx - 1] = 0; >> + costBOClasses[classIdx - 1] = m_lumaLambda; >> >> - if (count) >> - { >> - offsetOut = estIterOffset(SAO_BO, classIdx, m_lumaLambda, >> offsetOut, count, offsetOrg, currentDistortionTableBo, >> currentRdCostTableBo); >> - } >> - else >> - { >> - offsetOrg = 0; >> - offsetOut = 0; >> - } >> + if (offsetOut) >> + offsetOut = estIterOffset(SAO_BO, m_lumaLambda, offsetOut, >> count, offsetOrg, distBOClasses[classIdx - 1], costBOClasses[classIdx - 1]); >> } >> >> // Estimate Best Position >> - double bestRDCostTableBo = MAX_DOUBLE; >> - int bestClassTableBo = 0; >> + double bestRDCostBO = MAX_DOUBLE; >> + int bestClassBO = 0; >> >> for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++) >> { >> double currentRDCost = 0.0; >> for (int j = i; j < i + SAO_BO_LEN; j++) >> - currentRDCost += currentRdCostTableBo[j]; >> + currentRDCost += costBOClasses[j]; >> >> - if (currentRDCost < bestRDCostTableBo) >> + if (currentRDCost < bestRDCostBO) >> { >> - bestRDCostTableBo = currentRDCost; >> - bestClassTableBo = i; >> + bestRDCostBO = currentRDCost; >> + bestClassBO = i; >> } >> } >> >> estDist = 0; >> - for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + >> SAO_BO_LEN; classIdx++) >> - estDist += currentDistortionTableBo[classIdx]; >> + for (int classIdx = bestClassBO; classIdx < bestClassBO + >> SAO_BO_LEN; classIdx++) >> + estDist += distBOClasses[classIdx]; >> >> m_entropyCoder.load(m_rdContexts.temp); >> m_entropyCoder.resetBits(); >> - m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + >> (bestClassTableBo + 1), bestClassTableBo, 0); >> + m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + (bestClassBO + >> 1), bestClassBO, 0); >> >> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >> double cost = (double)estDist + m_lumaLambda * (double)estRate; >> @@ -1714,9 +1547,9 @@ >> >> lclCtuParam->mergeMode = SAO_MERGE_NONE; >> lclCtuParam->typeIdx = SAO_BO; >> - lclCtuParam->bandPos = bestClassTableBo; >> + lclCtuParam->bandPos = bestClassBO; >> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) >> - lclCtuParam->offset[classIdx] = >> (int)m_offset[0][SAO_BO][classIdx + bestClassTableBo + 1]; >> + lclCtuParam->offset[classIdx] = >> (int)m_offset[0][SAO_BO][classIdx + bestClassBO + 1]; >> } >> >> mergeDist[0] = ((double)bestDist / m_lumaLambda); >> @@ -1732,9 +1565,9 @@ >> >> SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], >> &saoParam->ctuParam[2][addr] }; >> >> - double currentRdCostTableBo[MAX_NUM_SAO_CLASS]; >> - int currentDistortionTableBo[MAX_NUM_SAO_CLASS]; >> - int bestClassTableBo[2] = { 0, 0 }; >> + double costBOClasses[MAX_NUM_SAO_CLASS]; >> + int distBOClasses[MAX_NUM_SAO_CLASS]; >> + int bestClassBO[2] = { 0, 0 }; >> >> m_entropyCoder.load(m_rdContexts.temp); >> m_entropyCoder.resetBits(); >> @@ -1754,15 +1587,8 @@ >> int32_t& offsetOrg = >> m_offsetOrg[compIdx][typeIdx][classIdx]; >> int32_t& offsetOut = >> m_offset[compIdx][typeIdx][classIdx]; >> >> - if (count) >> - { >> - offsetOut = estIterOffset(typeIdx, classIdx, >> m_chromaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo, >> currentRdCostTableBo); >> - } >> - else >> - { >> - offsetOrg = 0; >> - offsetOut = 0; >> - } >> + if (offsetOut) >> + offsetOut = estIterOffset(typeIdx, m_chromaLambda, >> offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]); >> >> estDist[compIdx - 1] += estSaoDist(count, (int)offsetOut >> << SAO_BIT_INC, offsetOrg); >> } >> @@ -1772,9 +1598,7 @@ >> m_entropyCoder.resetBits(); >> >> for (int compIdx = 0; compIdx < 2; compIdx++) >> - { >> m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx + >> 1][typeIdx] + 1, typeIdx, compIdx + 1); >> - } >> >> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >> double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda >> * (double)estRate; >> @@ -1805,7 +1629,7 @@ >> // Estimate Best Position >> for (int compIdx = 1; compIdx < 3; compIdx++) >> { >> - double bestRDCostTableBo = MAX_DOUBLE; >> + double bestRDCostBO = MAX_DOUBLE; >> >> for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; >> classIdx++) >> { >> @@ -1813,45 +1637,36 @@ >> int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx]; >> int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx]; >> >> - currentDistortionTableBo[classIdx - 1] = 0; >> - currentRdCostTableBo[classIdx - 1] = m_chromaLambda; >> + distBOClasses[classIdx - 1] = 0; >> + costBOClasses[classIdx - 1] = m_chromaLambda; >> >> - if (count) >> - { >> - offsetOut = estIterOffset(SAO_BO, classIdx, >> m_chromaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo, >> currentRdCostTableBo); >> - } >> - else >> - { >> - offsetOrg = 0; >> - offsetOut = 0; >> - } >> + if (offsetOut) >> + offsetOut = estIterOffset(SAO_BO, m_chromaLambda, >> offsetOut, count, offsetOrg, distBOClasses[classIdx - 1], >> costBOClasses[classIdx - 1]); >> } >> >> for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++) >> { >> double currentRDCost = 0.0; >> for (int j = i; j < i + SAO_BO_LEN; j++) >> - currentRDCost += currentRdCostTableBo[j]; >> + currentRDCost += costBOClasses[j]; >> >> - if (currentRDCost < bestRDCostTableBo) >> + if (currentRDCost < bestRDCostBO) >> { >> - bestRDCostTableBo = currentRDCost; >> - bestClassTableBo[compIdx - 1] = i; >> + bestRDCostBO = currentRDCost; >> + bestClassBO[compIdx - 1] = i; >> } >> } >> >> estDist[compIdx - 1] = 0; >> - for (int classIdx = bestClassTableBo[compIdx - 1]; classIdx < >> bestClassTableBo[compIdx - 1] + SAO_BO_LEN; classIdx++) >> - estDist[compIdx - 1] += currentDistortionTableBo[classIdx]; >> + for (int classIdx = bestClassBO[compIdx - 1]; classIdx < >> bestClassBO[compIdx - 1] + SAO_BO_LEN; classIdx++) >> + estDist[compIdx - 1] += distBOClasses[classIdx]; >> } >> >> m_entropyCoder.load(m_rdContexts.temp); >> m_entropyCoder.resetBits(); >> >> for (int compIdx = 0; compIdx < 2; compIdx++) >> - { >> - m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + >> (bestClassTableBo[compIdx] + 1), bestClassTableBo[compIdx], compIdx + 1); >> - } >> + m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + >> (bestClassBO[compIdx] + 1), bestClassBO[compIdx], compIdx + 1); >> >> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >> double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * >> (double)estRate; >> @@ -1865,9 +1680,9 @@ >> { >> lclCtuParam[compIdx]->mergeMode = SAO_MERGE_NONE; >> lclCtuParam[compIdx]->typeIdx = SAO_BO; >> - lclCtuParam[compIdx]->bandPos = bestClassTableBo[compIdx]; >> + lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx]; >> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) >> - lclCtuParam[compIdx]->offset[classIdx] = >> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassTableBo[compIdx] + >> 1]; >> + lclCtuParam[compIdx]->offset[classIdx] = >> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx] + 1]; >> } >> } >> >> diff -r add820b8cde0 -r 0f89b83c6fe6 source/encoder/sao.h >> --- a/source/encoder/sao.h Mon Jan 18 21:20:57 2016 +0530 >> +++ b/source/encoder/sao.h Mon Jan 18 21:21:25 2016 +0530 >> @@ -136,7 +136,7 @@ >> void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX); >> void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int >> idxX); >> >> - void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* >> saoUnitSrc); >> +// void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* >> saoUnitSrc); >> >> void calcSaoStatsCu(int addr, int plane); >> void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY); >> @@ -144,10 +144,10 @@ >> void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double* >> mergeDist); >> void saoChromaComponentParamDist(SAOParam* saoParam, int addr, >> double* mergeDist); >> >> - inline int estIterOffset(int typeIdx, int classIdx, double lambda, >> int offset, int32_t count, int32_t offsetOrg, >> - int32_t* currentDistortionTableBo, double* >> currentRdCostTableBo); >> + inline int estIterOffset(int typeIdx, double lambda, int offset, >> int32_t count, int32_t offsetOrg, >> + int& currentDistortionTableBo, double& >> currentRdCostTableBo); >> void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus); >> - void rdoSaoUnitRow(SAOParam* saoParam, int idxY); >> +// void rdoSaoUnitRow(SAOParam* saoParam, int idxY); >> void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int >> addr); >> >> void saoStatsInitialOffset(int plane); >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel >> > > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
