Results after converting from float to int. *Before(crowd_run, medium)* encoded 500 frames in 76.15s (6.57 fps), 9685.07 kb/s, Avg QP:37.80, Global PSNR : 31.045, SSIM Mean Y: 0.8417967 ( 8.008 dB) *After float to fixed* encoded 500 frames in 75.17s (6.65 fps), 9689.38 kb/s, Avg QP:37.79, Global PSNR : 31.045, SSIM Mean Y: 0.8418636 ( 8.010 dB)
*Before(crowd_run, veryslow)* encoded 500 frames in 904.58s (0.55 fps), 9500.28 kb/s, Avg QP:37.78, Global PSNR: 31.504, SSIM Mean Y: 0.8496374 ( 8.229 dB) *After float to fixed* encoded 500 frames in 946.15s (0.53 fps), 9504.15 kb/s, Avg QP:37.79, Global PSNR: 31.503, SSIM Mean Y: 0.8496565 ( 8.229 dB) *Before(crowd_run, veryslow, --bitrate 7000)* encoded 500 frames in 784.06s (0.64 fps), 6944.36 kb/s, Avg QP:39.73, Global PSNR: 30.432, SSIM Mean Y: 0.8161498 ( 7.355 dB) *After float to fixed* encoded 500 frames in 765.03s (0.65 fps), 6944.96 kb/s, Avg QP:39.73, Global PSNR: 30.432, SSIM Mean Y: 0.8164274 ( 7.362 dB) *Before(crowd_run, medium, --bitrate 7000)* encoded 500 frames in 69.49s (7.20 fps), 6947.67 kb/s, Avg QP:39.77, Global PSNR : 29.970, SSIM Mean Y: 0.8073355 ( 7.152 dB) *After float to fixed* encoded 500 frames in 70.02s (7.14 fps), 6948.83 kb/s, Avg QP:39.76, Global PSNR : 29.972, SSIM Mean Y: 0.8075512 ( 7.157 dB) *Before(crowd_run, medium, 2160p, --bitrate 7000)* encoded 500 frames in 258.98s (1.93 fps), 6963.32 kb/s, Avg QP:45.10, Global PSNR: 30.257, SSIM Mean Y: 0.7517402 ( 6.051 dB) *After float to fixed* encoded 500 frames in 264.31s (1.89 fps), 6962.31 kb/s, Avg QP:45.10, Global PSNR: 30.257, SSIM Mean Y: 0.7517543 ( 6.051 dB) On Wed, Apr 6, 2016 at 1:13 PM, <[email protected]> wrote: > # HG changeset patch > # User Ashok Kumar Mishra<[email protected]> > # Date 1459928537 -19800 > # Wed Apr 06 13:12:17 2016 +0530 > # Node ID 511241d3ee7d6d53a999d46881e7921a601fa0e9 > # Parent 33ff2e5f6eb7c8cf4f3edaa265762f32aa9b6f0f > [OUTPUT CHANGED]SAO: convert sao rdo cost calculation from float to int > > diff -r 33ff2e5f6eb7 -r 511241d3ee7d source/encoder/sao.cpp > --- a/source/encoder/sao.cpp Tue Mar 29 13:10:49 2016 +0530 > +++ b/source/encoder/sao.cpp Wed Apr 06 13:12:17 2016 +0530 > @@ -53,7 +53,7 @@ > return r; > } > > -inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg) > +inline int64_t estSaoDist(int32_t count, int32_t offset, int32_t > offsetOrg) > { > return (count * offset - offsetOrg * 2) * offset; > } > @@ -1193,17 +1193,15 @@ > const CUData* cu = m_frame->m_encData->getPicCTU(addr); > int qp = cu->m_qp[0]; > > - double lambda[2] = {0.0}; > - > + int64_t lambda[2] = { 0 }; > int qpCb = qp; > if (m_param->internalCsp == X265_CSP_I420) > qpCb = x265_clip3(QP_MIN, QP_MAX_MAX, (int)g_chromaScale[qp + > slice->m_pps->chromaQpOffset[0]]); > else > qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0], > QP_MAX_SPEC); > > - lambda[0] = x265_lambda2_tab[qp]; > - lambda[1] = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma > - > + lambda[0] = (int64_t)floor(256.0 * x265_lambda2_tab[qp]); > + lambda[1] = (int64_t)floor(256.0 * x265_lambda2_tab[qpCb]); // Use Cb > QP for SAO chroma > const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; // > left, up > > const int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ? addr > - m_numCuInWidth : -1)};// left, up > @@ -1249,8 +1247,8 @@ > m_entropyCoder.codeSaoMerge(0); > m_entropyCoder.store(m_rdContexts.temp); > > - double mergeDist[NUM_MERGE_MODE] = { 0.0 }; > - double bestCost = 0.0; > + int64_t mergeDist[NUM_MERGE_MODE] = { 0 }; > + int64_t bestCost = 0; > > // Estimate distortion and cost of new SAO params > saoLumaComponentParamDist(saoParam, addr, mergeDist, lambda, > bestCost); > @@ -1279,8 +1277,7 @@ > estDist += > estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset, > m_offsetOrg[plane][typeIdx][classIdx + bandPos]); > } > } > - > - mergeDist[mergeIdx + 1] += ((double)estDist / > lambda[!!plane]); > + mergeDist[mergeIdx + 1] += (estDist / (lambda[!!plane] >> > 8)); > } > > m_entropyCoder.load(m_rdContexts.cur); > @@ -1290,8 +1287,9 @@ > if (allowMerge[1] && (mergeIdx == 1)) > m_entropyCoder.codeSaoMerge(1); > > - int32_t rate = m_entropyCoder.getNumberOfWrittenBits(); > - double mergeCost = mergeDist[mergeIdx + 1] + (double)rate; > + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); > + int64_t mergeCost = mergeDist[mergeIdx + 1] + rate; > + // Compare merge cost with best offset cost > if (mergeCost < bestCost) > { > SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : > SAO_MERGE_LEFT; > @@ -1338,7 +1336,7 @@ > { > for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; > classIdx++) > { > - int32_t count = m_count[plane][typeIdx][classIdx]; > + int32_t& count = m_count[plane][typeIdx][classIdx]; > int32_t& offsetOrg = > m_offsetOrg[plane][typeIdx][classIdx]; > int32_t& offsetOut = m_offset[plane][typeIdx][classIdx]; > > @@ -1361,7 +1359,7 @@ > { > for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++) > { > - int32_t count = m_count[plane][SAO_BO][classIdx]; > + int32_t& count = m_count[plane][SAO_BO][classIdx]; > int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx]; > int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx]; > > @@ -1374,14 +1372,28 @@ > } > } > > -void SAO::estIterOffset(int typeIdx, double lambda, int32_t count, > int32_t offsetOrg, int& offset, int& distClasses, double& costClasses) > +inline int64_t SAO::calcSaoRdoCost(int64_t distortion, uint32_t bits, > int64_t lambda) > +{ > +#if X265_DEPTH < 10 > + X265_CHECK(bits <= (INT64_MAX - 128) / lambda, > + "calcRdCost wrap detected dist: %u, bits %u, lambda: " > X265_LL "\n", > + distortion, bits, lambda); > +#else > + X265_CHECK(bits <= (INT64_MAX - 128) / lambda2, > + "calcRdCost wrap detected dist: " X265_LL ", bits %u, > lambda: " X265_LL "\n", > + distortion, bits, lambda); > +#endif > + return distortion + ((bits * lambda + 128) >> 8); > +} > + > +void SAO::estIterOffset(int typeIdx, int64_t lambda, int32_t count, > int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t& > costClasses) > { > int bestOffset = 0; > distClasses = 0; > > // Assuming sending quantized value 0 results in zero offset and > sending the value zero needs 1 bit. > // entropy coder can be used to measure the exact rate here. > - double bestCost = lambda; > + int64_t bestCost = calcSaoRdoCost(0, 1, lambda); > while (offset != 0) > { > // Calculate the bits required for signalling the offset > @@ -1391,7 +1403,7 @@ > > // Do the dequntization before distorion calculation > int64_t dist = estSaoDist(count, offset << SAO_BIT_INC, > offsetOrg); > - double cost = ((double)dist + lambda * (double)rate); > + int64_t cost = calcSaoRdoCost(dist, rate, lambda); > if (cost < bestCost) > { > bestCost = cost; > @@ -1405,30 +1417,30 @@ > offset = bestOffset; > } > > -void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, double* > mergeDist, double* lambda, double &bestCost) > +void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr, > int64_t* mergeDist, int64_t* lambda, int64_t &bestCost) > { > int64_t bestDist = 0; > int bestTypeIdx = -1; > > SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr]; > > - int distClasses[MAX_NUM_SAO_CLASS]; > - double costClasses[MAX_NUM_SAO_CLASS]; > + int32_t distClasses[MAX_NUM_SAO_CLASS]; > + int64_t costClasses[MAX_NUM_SAO_CLASS]; > > // RDO SAO_NA > m_entropyCoder.load(m_rdContexts.temp); > m_entropyCoder.resetBits(); > m_entropyCoder.codeSaoType(0); > + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); > + int64_t costPartBest = calcSaoRdoCost(0, rate, lambda[0]); > > - double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * > lambda[0]; > - > - //EO distortion calculation > + // RDO SAO_EO > for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++) > { > int64_t estDist = 0; > for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++) > { > - int32_t count = m_count[0][typeIdx][classIdx]; > + int32_t& count = m_count[0][typeIdx][classIdx]; > int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx]; > int32_t& offsetOut = m_offset[0][typeIdx][classIdx]; > > @@ -1441,13 +1453,12 @@ > m_entropyCoder.load(m_rdContexts.temp); > m_entropyCoder.resetBits(); > m_entropyCoder.codeSaoOffsetEO(m_offset[0][typeIdx] + 1, typeIdx, > 0); > + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); > + int64_t cost = calcSaoRdoCost(estDist, rate, lambda[0]); > > - uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); > - double cost = (double)estDist + lambda[0] * (double)estRate; > - > - if (cost < dCostPartBest) > + if (cost < costPartBest) > { > - dCostPartBest = cost; > + costPartBest = cost; > bestDist = estDist; > bestTypeIdx = typeIdx; > } > @@ -1459,14 +1470,14 @@ > lclCtuParam->typeIdx = bestTypeIdx; > lclCtuParam->bandPos = 0; > for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) > - lclCtuParam->offset[classIdx] = > (int)m_offset[0][bestTypeIdx][classIdx + 1]; > + lclCtuParam->offset[classIdx] = > m_offset[0][bestTypeIdx][classIdx + 1]; > } > > - //BO RDO > + // RDO SAO_BO > int64_t estDist = 0; > for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++) > { > - int32_t count = m_count[0][SAO_BO][classIdx]; > + int32_t& count = m_count[0][SAO_BO][classIdx]; > int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx]; > int32_t& offsetOut = m_offset[0][SAO_BO][classIdx]; > > @@ -1474,12 +1485,12 @@ > } > > // Estimate Best Position > - double bestRDCostBO = MAX_DOUBLE; > - int bestClassBO = 0; > + int64_t bestRDCostBO = MAX_INT64; > + int32_t bestClassBO = 0; > > for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++) > { > - double currentRDCost = 0.0; > + int64_t currentRDCost = 0; > for (int j = i; j < i + SAO_NUM_OFFSET; j++) > currentRDCost += costClasses[j]; > > @@ -1494,52 +1505,54 @@ > for (int classIdx = bestClassBO; classIdx < bestClassBO + > SAO_NUM_OFFSET; classIdx++) > estDist += distClasses[classIdx]; > > + // Estimate best BO cost > m_entropyCoder.load(m_rdContexts.temp); > m_entropyCoder.resetBits(); > m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO, > bestClassBO, 0); > + uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); > + int64_t cost = calcSaoRdoCost(estDist, estRate, lambda[0]); > > - uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); > - double cost = (double)estDist + lambda[0] * (double)estRate; > - > - if (cost < dCostPartBest) > + if (cost < costPartBest) > { > - dCostPartBest = cost; > + costPartBest = cost; > bestDist = estDist; > > lclCtuParam->mergeMode = SAO_MERGE_NONE; > lclCtuParam->typeIdx = SAO_BO; > lclCtuParam->bandPos = bestClassBO; > for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) > - lclCtuParam->offset[classIdx] = > (int)m_offset[0][SAO_BO][classIdx + bestClassBO]; > + lclCtuParam->offset[classIdx] = m_offset[0][SAO_BO][classIdx > + bestClassBO]; > } > > - mergeDist[0] = ((double)bestDist / lambda[0]); > + mergeDist[0] = bestDist / (lambda[0] >> 8); > + > m_entropyCoder.load(m_rdContexts.temp); > m_entropyCoder.codeSaoOffset(*lclCtuParam, 0); > m_entropyCoder.store(m_rdContexts.temp); > > - uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); > - bestCost = mergeDist[0] + (double)rate; > + uint32_t bits = m_entropyCoder.getNumberOfWrittenBits(); > + bestCost = mergeDist[0] + bits; > } > > -void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr, > double* mergeDist, double* lambda, double &bestCost) > +void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr, > int64_t* mergeDist, int64_t* lambda, int64_t &bestCost) > { > int64_t bestDist = 0; > int bestTypeIdx = -1; > > SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], > &saoParam->ctuParam[2][addr] }; > > - double costClasses[MAX_NUM_SAO_CLASS]; > - int distClasses[MAX_NUM_SAO_CLASS]; > - int bestClassBO[2] = { 0, 0 }; > + int64_t costClasses[MAX_NUM_SAO_CLASS]; > + int32_t distClasses[MAX_NUM_SAO_CLASS]; > + int32_t bestClassBO[2] = { 0, 0 }; > > + // RDO SAO_NA > m_entropyCoder.load(m_rdContexts.temp); > m_entropyCoder.resetBits(); > m_entropyCoder.codeSaoType(0); > + uint32_t bits = m_entropyCoder.getNumberOfWrittenBits(); > + int64_t costPartBest = calcSaoRdoCost(0, bits, lambda[1]); > > - double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * > lambda[1]; > - > - //EO RDO > + // RDO SAO_EO > for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++) > { > int64_t estDist[2] = {0, 0}; > @@ -1547,7 +1560,7 @@ > { > for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; > classIdx++) > { > - int32_t count = m_count[compIdx][typeIdx][classIdx]; > + int32_t& count = m_count[compIdx][typeIdx][classIdx]; > int32_t& offsetOrg = > m_offsetOrg[compIdx][typeIdx][classIdx]; > int32_t& offsetOut = m_offset[compIdx][typeIdx][classIdx]; > > @@ -1564,11 +1577,11 @@ > m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx + 1][typeIdx] > + 1, typeIdx, compIdx + 1); > > uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); > - double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * > (double)estRate; > + int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate, > lambda[1]); > > - if (cost < dCostPartBest) > + if (cost < costPartBest) > { > - dCostPartBest = cost; > + costPartBest = cost; > bestDist = (estDist[0] + estDist[1]); > bestTypeIdx = typeIdx; > } > @@ -1582,21 +1595,21 @@ > lclCtuParam[compIdx]->typeIdx = bestTypeIdx; > lclCtuParam[compIdx]->bandPos = 0; > for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) > - lclCtuParam[compIdx]->offset[classIdx] = > (int)m_offset[compIdx + 1][bestTypeIdx][classIdx + 1]; > + lclCtuParam[compIdx]->offset[classIdx] = m_offset[compIdx > + 1][bestTypeIdx][classIdx + 1]; > } > } > > - // BO RDO > + // RDO SAO_BO > int64_t estDist[2]; > > // Estimate Best Position > for (int compIdx = 1; compIdx < 3; compIdx++) > { > - double bestRDCostBO = MAX_DOUBLE; > + int64_t bestRDCostBO = MAX_INT64; > > for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++) > { > - int32_t count = m_count[compIdx][SAO_BO][classIdx]; > + int32_t& count = m_count[compIdx][SAO_BO][classIdx]; > int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx]; > int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx]; > > @@ -1605,7 +1618,7 @@ > > for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++) > { > - double currentRDCost = 0.0; > + int64_t currentRDCost = 0; > for (int j = i; j < i + SAO_NUM_OFFSET; j++) > currentRDCost += costClasses[j]; > > @@ -1628,11 +1641,11 @@ > m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + > bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1); > > uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); > - double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * > (double)estRate; > + int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate, > lambda[1]); > > - if (cost < dCostPartBest) > + if (cost < costPartBest) > { > - dCostPartBest = cost; > + costPartBest = cost; > bestDist = (estDist[0] + estDist[1]); > > for (int compIdx = 0; compIdx < 2; compIdx++) > @@ -1641,11 +1654,11 @@ > lclCtuParam[compIdx]->typeIdx = SAO_BO; > lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx]; > for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) > - lclCtuParam[compIdx]->offset[classIdx] = > (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]]; > + lclCtuParam[compIdx]->offset[classIdx] = m_offset[compIdx > + 1][SAO_BO][classIdx + bestClassBO[compIdx]]; > } > } > > - mergeDist[0] += ((double)bestDist / lambda[1]); > + mergeDist[0] += (bestDist / (lambda[1] >> 8)); > m_entropyCoder.load(m_rdContexts.temp); > > m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1); > @@ -1653,7 +1666,7 @@ > m_entropyCoder.store(m_rdContexts.temp); > > uint32_t rate = m_entropyCoder.getNumberOfWrittenBits(); > - bestCost = mergeDist[0] + (double)rate; > + bestCost = mergeDist[0] + rate; > } > > // NOTE: must put in namespace X265_NS since we need class SAO > diff -r 33ff2e5f6eb7 -r 511241d3ee7d source/encoder/sao.h > --- a/source/encoder/sao.h Tue Mar 29 13:10:49 2016 +0530 > +++ b/source/encoder/sao.h Wed Apr 06 13:12:17 2016 +0530 > @@ -127,12 +127,14 @@ > void calcSaoStatsCu(int addr, int plane); > void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY); > > - void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double* > mergeDist, double* lambda, double &bestCost); > - void saoChromaComponentParamDist(SAOParam* saoParam, int addr, > double* mergeDist, double* lambda, double &bestCost); > + void saoLumaComponentParamDist(SAOParam* saoParam, int addr, int64_t* > mergeDist, int64_t* lambda, int64_t &bestCost); > + void saoChromaComponentParamDist(SAOParam* saoParam, int addr, > int64_t* mergeDist, int64_t* lambda, int64_t &bestCost); > > - void estIterOffset(int typeIdx, double lambda, int32_t count, int32_t > offsetOrg, int& offset, int& distClasses, double& costClasses); > + void estIterOffset(int typeIdx, int64_t lambda, int32_t count, > int32_t offsetOrg, int32_t& offset, > + int32_t& distClasses, int64_t& costClasses); > void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus); > void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int > addr); > + int64_t calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t > lambda); > > void saoStatsInitialOffset(bool chroma); > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
