The reason for output change is quite obvious. lambda value, cost calculation and comparison are on fixed point.
On Thu, May 26, 2016 at 5:54 PM, Ashok Kumar Mishra < [email protected]> wrote: > this is a output changing patch. is there any other issue apart from > changing output? > > On Thu, May 26, 2016 at 5:46 PM, Deepthi Nandakumar < > [email protected]> wrote: > >> >> >> On Wed, May 4, 2016 at 7:39 PM, <[email protected]> wrote: >> >>> # HG changeset patch >>> # User Ashok Kumar Mishra<[email protected]> >>> # Date 1462355258 -19800 >>> # Wed May 04 15:17:38 2016 +0530 >>> # Node ID 70a0888d0703a35b0c3c3a57f96931d0767eb470 >>> # Parent 9f27620a948b67498056246b97db72bebac99218 >>> [OUTPUT CHANGED]SAO: convert sao rdo cost calculation from float to int >>> >>> diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.cpp >>> --- a/source/encoder/sao.cpp Mon Apr 25 13:39:54 2016 +0530 >>> +++ b/source/encoder/sao.cpp Wed May 04 15:17:38 2016 +0530 >>> @@ -53,7 +53,7 @@ >>> return r; >>> } >>> >>> -inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg) >>> +inline int64_t estSaoDist(int32_t count, int32_t offset, int32_t >>> offsetOrg) >>> { >>> return (count * offset - offsetOrg * 2) * offset; >>> } >>> @@ -1193,7 +1193,7 @@ >>> const CUData* cu = m_frame->m_encData->getPicCTU(addr); >>> int qp = cu->m_qp[0]; >>> >>> - double lambda[2] = {0.0}; >>> + int64_t lambda[2] = { 0 }; >>> >>> int qpCb = qp; >>> if (m_param->internalCsp == X265_CSP_I420) >>> @@ -1201,8 +1201,8 @@ >>> else >>> qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0], >>> QP_MAX_SPEC); >>> >>> - lambda[0] = x265_lambda2_tab[qp]; >>> - lambda[1] = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma >>> + lambda[0] = (int64_t)floor(256.0 * x265_lambda2_tab[qp]); >>> + lambda[1] = (int64_t)floor(256.0 * x265_lambda2_tab[qpCb]); // Use >>> Cb QP for SAO chroma >>> >>> const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; // >>> left, up >>> >>> @@ -1250,8 +1250,8 @@ >>> m_entropyCoder.store(m_rdContexts.temp); >>> >>> // Estimate distortion and cost of new SAO params >>> - double bestCost = 0.0; >>> - double rateDist = 0.0; >>> + int64_t bestCost = 0; >>> + int64_t rateDist = 0; >>> // Estimate distortion and cost of new SAO params >>> saoLumaComponentParamDist(saoParam, addr, rateDist, lambda, >>> bestCost); >>> if (chroma) >>> @@ -1265,7 +1265,7 @@ >>> if (!allowMerge[mergeIdx]) >>> continue; >>> >>> - double mergeDist = 0; >>> + int64_t mergeDist = 0; >>> for (int plane = 0; plane < planes; plane++) >>> { >>> int64_t estDist = 0; >>> @@ -1280,7 +1280,7 @@ >>> estDist += >>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset, >>> m_offsetOrg[plane][typeIdx][classIdx + bandPos]); >>> } >>> } >>> - mergeDist += ((double)estDist / lambda[!!plane]); >>> + mergeDist += (estDist / (lambda[!!plane] >> 8)); >>> } >>> >>> m_entropyCoder.load(m_rdContexts.cur); >>> @@ -1290,8 +1290,8 @@ >>> if (allowMerge[1] && (mergeIdx == 1)) >>> m_entropyCoder.codeSaoMerge(1); >>> >>> - int32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >>> - double mergeCost = mergeDist + (double)estRate; >>> + uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >>> + int64_t mergeCost = mergeDist + estRate; >>> if (mergeCost < bestCost) >>> { >>> SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : >>> SAO_MERGE_LEFT; >>> @@ -1337,7 +1337,7 @@ >>> { >>> for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; >>> classIdx++) >>> { >>> - int32_t count = m_count[plane][typeIdx][classIdx]; >>> + int32_t& count = m_count[plane][typeIdx][classIdx]; >>> int32_t& offsetOrg = >>> m_offsetOrg[plane][typeIdx][classIdx]; >>> int32_t& offsetOut = m_offset[plane][typeIdx][classIdx]; >>> >>> @@ -1360,7 +1360,7 @@ >>> { >>> for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++) >>> { >>> - int32_t count = m_count[plane][SAO_BO][classIdx]; >>> + int32_t& count = m_count[plane][SAO_BO][classIdx]; >>> int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx]; >>> int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx]; >>> >>> @@ -1373,14 +1373,27 @@ >>> } >>> } >>> >>> -void SAO::estIterOffset(int typeIdx, double lambda, int32_t count, >>> int32_t offsetOrg, int& offset, int& distClasses, double& costClasses) >>> +inline int64_t SAO::calcSaoRdoCost(int64_t distortion, uint32_t bits, >>> int64_t lambda) >>> +{ >>> +#if X265_DEPTH < 10 >>> + X265_CHECK(bits <= (INT64_MAX - 128) / lambda, >>> + "calcRdCost wrap detected dist: %u, bits %u, lambda: >>> " X265_LL "\n", >>> + distortion, bits, lambda); >>> +#else >>> + X265_CHECK(bits <= (INT64_MAX - 128) / lambda2, >>> + "calcRdCost wrap detected dist: " X265_LL ", bits >>> %u, lambda: " X265_LL "\n", >>> + distortion, bits, lambda); >>> >> >> Fails smoke tests - did you mean lambda here? >> >> >>> +#endif >>> + return distortion + ((bits * lambda + 128) >> 8); >>> +} >>> +void SAO::estIterOffset(int typeIdx, int64_t lambda, int32_t count, >>> int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t& >>> costClasses) >>> { >>> int bestOffset = 0; >>> distClasses = 0; >>> >>> // Assuming sending quantized value 0 results in zero offset and >>> sending the value zero needs 1 bit. >>> // entropy coder can be used to measure the exact rate here. >>> - double bestCost = lambda; >>> + int64_t bestCost = calcSaoRdoCost(0, 1, lambda); >>> while (offset != 0) >>> { >>> // Calculate the bits required for signalling the offset >>> @@ -1390,7 +1403,7 @@ >>> >>> // Do the dequntization before distorion calculation >>> int64_t dist = estSaoDist(count, offset << SAO_BIT_INC, >>> offsetOrg); >>> - double cost = ((double)dist + lambda * (double)rate); >>> + int64_t cost = calcSaoRdoCost(dist, rate, lambda); >>> if (cost < bestCost) >>> { >>> bestCost = cost; >>> @@ -1404,22 +1417,23 @@ >>> offset = bestOffset; >>> } >>> >>> -void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, >>> double& rateDist, double* lambda, double &bestCost) >>> +void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr, >>> int64_t& rateDist, int64_t* lambda, int64_t &bestCost) >>> { >>> int64_t bestDist = 0; >>> int bestTypeIdx = -1; >>> >>> SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr]; >>> >>> - int distClasses[MAX_NUM_SAO_CLASS]; >>> - double costClasses[MAX_NUM_SAO_CLASS]; >>> + int32_t distClasses[MAX_NUM_SAO_CLASS]; >>> + int64_t costClasses[MAX_NUM_SAO_CLASS]; >>> >>> // RDO SAO_NA >>> m_entropyCoder.load(m_rdContexts.temp); >>> m_entropyCoder.resetBits(); >>> m_entropyCoder.codeSaoType(0); >>> >>> - double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * >>> lambda[0]; >>> + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); >>> + int64_t costPartBest = calcSaoRdoCost(0, rate, lambda[0]); >>> >>> //EO distortion calculation >>> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++) >>> @@ -1427,7 +1441,7 @@ >>> int64_t estDist = 0; >>> for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; >>> classIdx++) >>> { >>> - int32_t count = m_count[0][typeIdx][classIdx]; >>> + int32_t& count = m_count[0][typeIdx][classIdx]; >>> int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx]; >>> int32_t& offsetOut = m_offset[0][typeIdx][classIdx]; >>> >>> @@ -1441,12 +1455,12 @@ >>> m_entropyCoder.resetBits(); >>> m_entropyCoder.codeSaoOffsetEO(m_offset[0][typeIdx] + 1, >>> typeIdx, 0); >>> >>> - uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >>> - double cost = (double)estDist + lambda[0] * (double)estRate; >>> + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); >>> + int64_t cost = calcSaoRdoCost(estDist, rate, lambda[0]); >>> >>> - if (cost < dCostPartBest) >>> + if (cost < costPartBest) >>> { >>> - dCostPartBest = cost; >>> + costPartBest = cost; >>> bestDist = estDist; >>> bestTypeIdx = typeIdx; >>> } >>> @@ -1458,14 +1472,14 @@ >>> lclCtuParam->typeIdx = bestTypeIdx; >>> lclCtuParam->bandPos = 0; >>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) >>> - lclCtuParam->offset[classIdx] = >>> (int)m_offset[0][bestTypeIdx][classIdx + 1]; >>> + lclCtuParam->offset[classIdx] = >>> m_offset[0][bestTypeIdx][classIdx + 1]; >>> } >>> >>> //BO RDO >>> int64_t estDist = 0; >>> for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++) >>> { >>> - int32_t count = m_count[0][SAO_BO][classIdx]; >>> + int32_t& count = m_count[0][SAO_BO][classIdx]; >>> int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx]; >>> int32_t& offsetOut = m_offset[0][SAO_BO][classIdx]; >>> >>> @@ -1473,12 +1487,12 @@ >>> } >>> >>> // Estimate Best Position >>> - double bestRDCostBO = MAX_DOUBLE; >>> - int bestClassBO = 0; >>> + int64_t bestRDCostBO = MAX_INT64; >>> + int32_t bestClassBO = 0; >>> >>> for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++) >>> { >>> - double currentRDCost = 0.0; >>> + int64_t currentRDCost = 0; >>> for (int j = i; j < i + SAO_NUM_OFFSET; j++) >>> currentRDCost += costClasses[j]; >>> >>> @@ -1498,21 +1512,21 @@ >>> m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO, >>> bestClassBO, 0); >>> >>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >>> - double cost = (double)estDist + lambda[0] * (double)estRate; >>> + int64_t cost = calcSaoRdoCost(estDist, estRate, lambda[0]); >>> >>> - if (cost < dCostPartBest) >>> + if (cost < costPartBest) >>> { >>> - dCostPartBest = cost; >>> + costPartBest = cost; >>> bestDist = estDist; >>> >>> lclCtuParam->mergeMode = SAO_MERGE_NONE; >>> lclCtuParam->typeIdx = SAO_BO; >>> lclCtuParam->bandPos = bestClassBO; >>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) >>> - lclCtuParam->offset[classIdx] = >>> (int)m_offset[0][SAO_BO][classIdx + bestClassBO]; >>> + lclCtuParam->offset[classIdx] = >>> m_offset[0][SAO_BO][classIdx + bestClassBO]; >>> } >>> >>> - rateDist = ((double)bestDist / lambda[0]); >>> + rateDist = bestDist / (lambda[0] >> 8); >>> m_entropyCoder.load(m_rdContexts.temp); >>> m_entropyCoder.codeSaoOffset(*lclCtuParam, 0); >>> m_entropyCoder.store(m_rdContexts.temp); >>> @@ -1520,26 +1534,27 @@ >>> if (m_param->internalCsp == X265_CSP_I400) >>> { >>> uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); >>> - bestCost = rateDist + (double)rate; >>> + bestCost = rateDist + rate; >>> } >>> } >>> >>> -void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr, >>> double& rateDist, double* lambda, double &bestCost) >>> +void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr, >>> int64_t& rateDist, int64_t* lambda, int64_t &bestCost) >>> { >>> int64_t bestDist = 0; >>> int bestTypeIdx = -1; >>> >>> SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], >>> &saoParam->ctuParam[2][addr] }; >>> >>> - double costClasses[MAX_NUM_SAO_CLASS]; >>> - int distClasses[MAX_NUM_SAO_CLASS]; >>> - int bestClassBO[2] = { 0, 0 }; >>> + int64_t costClasses[MAX_NUM_SAO_CLASS]; >>> + int32_t distClasses[MAX_NUM_SAO_CLASS]; >>> + int32_t bestClassBO[2] = { 0, 0 }; >>> >>> m_entropyCoder.load(m_rdContexts.temp); >>> m_entropyCoder.resetBits(); >>> m_entropyCoder.codeSaoType(0); >>> >>> - double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * >>> lambda[1]; >>> + uint32_t bits = m_entropyCoder.getNumberOfWrittenBits(); >>> + int64_t costPartBest = calcSaoRdoCost(0, bits, lambda[1]); >>> >>> //EO RDO >>> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++) >>> @@ -1549,7 +1564,7 @@ >>> { >>> for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; >>> classIdx++) >>> { >>> - int32_t count = m_count[compIdx][typeIdx][classIdx]; >>> + int32_t& count = m_count[compIdx][typeIdx][classIdx]; >>> int32_t& offsetOrg = >>> m_offsetOrg[compIdx][typeIdx][classIdx]; >>> int32_t& offsetOut = >>> m_offset[compIdx][typeIdx][classIdx]; >>> >>> @@ -1566,11 +1581,11 @@ >>> m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx + >>> 1][typeIdx] + 1, typeIdx, compIdx + 1); >>> >>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >>> - double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * >>> (double)estRate; >>> + int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), >>> estRate, lambda[1]); >>> >>> - if (cost < dCostPartBest) >>> + if (cost < costPartBest) >>> { >>> - dCostPartBest = cost; >>> + costPartBest = cost; >>> bestDist = (estDist[0] + estDist[1]); >>> bestTypeIdx = typeIdx; >>> } >>> @@ -1584,7 +1599,7 @@ >>> lclCtuParam[compIdx]->typeIdx = bestTypeIdx; >>> lclCtuParam[compIdx]->bandPos = 0; >>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; >>> classIdx++) >>> - lclCtuParam[compIdx]->offset[classIdx] = >>> (int)m_offset[compIdx + 1][bestTypeIdx][classIdx + 1]; >>> + lclCtuParam[compIdx]->offset[classIdx] = >>> m_offset[compIdx + 1][bestTypeIdx][classIdx + 1]; >>> } >>> } >>> >>> @@ -1594,11 +1609,11 @@ >>> // Estimate Best Position >>> for (int compIdx = 1; compIdx < 3; compIdx++) >>> { >>> - double bestRDCostBO = MAX_DOUBLE; >>> + int64_t bestRDCostBO = MAX_INT64; >>> >>> for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++) >>> { >>> - int32_t count = m_count[compIdx][SAO_BO][classIdx]; >>> + int32_t& count = m_count[compIdx][SAO_BO][classIdx]; >>> int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx]; >>> int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx]; >>> >>> @@ -1607,7 +1622,7 @@ >>> >>> for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++) >>> { >>> - double currentRDCost = 0.0; >>> + int64_t currentRDCost = 0; >>> for (int j = i; j < i + SAO_NUM_OFFSET; j++) >>> currentRDCost += costClasses[j]; >>> >>> @@ -1630,11 +1645,11 @@ >>> m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + >>> bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1); >>> >>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); >>> - double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * >>> (double)estRate; >>> + int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate, >>> lambda[1]); >>> >>> - if (cost < dCostPartBest) >>> + if (cost < costPartBest) >>> { >>> - dCostPartBest = cost; >>> + costPartBest = cost; >>> bestDist = (estDist[0] + estDist[1]); >>> >>> for (int compIdx = 0; compIdx < 2; compIdx++) >>> @@ -1643,11 +1658,11 @@ >>> lclCtuParam[compIdx]->typeIdx = SAO_BO; >>> lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx]; >>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; >>> classIdx++) >>> - lclCtuParam[compIdx]->offset[classIdx] = >>> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]]; >>> + lclCtuParam[compIdx]->offset[classIdx] = >>> m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]]; >>> } >>> } >>> >>> - rateDist += ((double)bestDist / lambda[1]); >>> + rateDist += (bestDist / (lambda[1] >> 8)); >>> m_entropyCoder.load(m_rdContexts.temp); >>> >>> if (saoParam->bSaoFlag[1]) >>> @@ -1657,12 +1672,12 @@ >>> m_entropyCoder.store(m_rdContexts.temp); >>> >>> uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); >>> - bestCost = rateDist + (double)rate; >>> + bestCost = rateDist + rate; >>> } >>> else >>> { >>> uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); >>> - bestCost = rateDist + (double)rate; >>> + bestCost = rateDist + rate; >>> } >>> } >>> >>> diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.h >>> --- a/source/encoder/sao.h Mon Apr 25 13:39:54 2016 +0530 >>> +++ b/source/encoder/sao.h Wed May 04 15:17:38 2016 +0530 >>> @@ -126,12 +126,13 @@ >>> void calcSaoStatsCu(int addr, int plane); >>> void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY); >>> >>> - void saoLumaComponentParamDist(SAOParam* saoParam, int addr, >>> double& rateDist, double* lambda, double &bestCost); >>> - void saoChromaComponentParamDist(SAOParam* saoParam, int addr, >>> double& rateDist, double* lambda, double &bestCost); >>> + void saoLumaComponentParamDist(SAOParam* saoParam, int addr, >>> int64_t& rateDist, int64_t* lambda, int64_t& bestCost); >>> + void saoChromaComponentParamDist(SAOParam* saoParam, int addr, >>> int64_t& rateDist, int64_t* lambda, int64_t& bestCost); >>> >>> - void estIterOffset(int typeIdx, double lambda, int32_t count, >>> int32_t offsetOrg, int& offset, int& distClasses, double& costClasses); >>> + void estIterOffset(int typeIdx, int64_t lambda, int32_t count, >>> int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t& >>> costClasses); >>> void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus); >>> void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, >>> int addr); >>> + int64_t calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t >>> lambda); >>> >>> void saoStatsInitialOffset(int planes); >>> >>> _______________________________________________ >>> x265-devel mailing list >>> [email protected] >>> https://mailman.videolan.org/listinfo/x265-devel >>> >> >> >> >> -- >> Deepthi Nandakumar >> Engineering Manager, x265 >> Multicoreware, Inc >> >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel >> >> >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
