I have added some information, you can review that. On Thu, Aug 20, 2015 at 11:53 AM, Sagar Kotecha <sa...@multicorewareinc.com> wrote:
> > > On Thu, Aug 20, 2015 at 8:47 AM, Steve Borho <st...@borho.org> wrote: > >> On 08/19, sa...@multicorewareinc.com wrote: >> > # HG changeset patch >> > # User Sagar Kotecha <sa...@multicorewareinc.com> >> > # Date 1439978360 -19800 >> > # Node ID 8878f03570b2a8d0207f5e96bae3900de5653ec0 >> > # Parent 2980141a744a569ad6f60dbebdece76a4eababfd >> > Add emergency denoising when frame qp > QP_MAX_SPEC >> > >> > This feature is ported from x264, and is turned on for VBV encodes >> >> this needs to be documented in the reST docs somewhere, since our >> emergency denoise works differently than in x264 >> >> > OK, I Will send it in separate patch. > > > >> > diff -r 2980141a744a -r 8878f03570b2 source/common/common.h >> > --- a/source/common/common.h Tue Aug 18 12:45:52 2015 +0530 >> > +++ b/source/common/common.h Wed Aug 19 15:29:20 2015 +0530 >> > @@ -317,6 +317,9 @@ >> > #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420) >> > #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8 >> > >> > +#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // >> Maximum number of transform coefficients, for a 32x32 transform >> > +#define MAX_NUM_TR_CATEGORIES 16 // 32, >> 16, 8, 4 transform categories each for luma and chroma >> > + >> > namespace X265_NS { >> > >> > enum { SAO_NUM_OFFSET = 4 }; >> > diff -r 2980141a744a -r 8878f03570b2 source/common/quant.cpp >> > --- a/source/common/quant.cpp Tue Aug 18 12:45:52 2015 +0530 >> > +++ b/source/common/quant.cpp Wed Aug 19 15:29:20 2015 +0530 >> > @@ -444,12 +444,12 @@ >> > primitives.cu[sizeIdx].dct(m_fencShortBuf, >> m_fencDctCoeff, trSize); >> > } >> > >> > - if (m_nr) >> > + if (m_nr && m_nr->offset) >> > { >> > /* denoise is not applied to intra residual, so DST can be >> ignored */ >> > int cat = sizeIdx + 4 * !isLuma + 8 * !isIntra; >> > int numCoeff = 1 << (log2TrSize * 2); >> > - primitives.denoiseDct(m_resiDctCoeff, >> m_nr->residualSum[cat], m_nr->offsetDenoise[cat], numCoeff); >> > + primitives.denoiseDct(m_resiDctCoeff, >> m_nr->residualSum[cat], m_nr->offset[cat], numCoeff); >> > m_nr->count[cat]++; >> > } >> > } >> > diff -r 2980141a744a -r 8878f03570b2 source/common/quant.h >> > --- a/source/common/quant.h Tue Aug 18 12:45:52 2015 +0530 >> > +++ b/source/common/quant.h Wed Aug 19 15:29:20 2015 +0530 >> > @@ -59,18 +59,19 @@ >> > } >> > }; >> > >> > -#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE /* Maximum >> number of transform coefficients, for a 32x32 transform */ >> > -#define MAX_NUM_TR_CATEGORIES 16 /* 32, 16, >> 8, 4 transform categories each for luma and chroma */ >> > - >> > // NOTE: MUST be 16-byte aligned for asm code >> > struct NoiseReduction >> > { >> > /* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32 >> > * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma >> 32x32 >> > * Intra 0..7 - Inter 8..15 */ >> > - uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]; >> > - uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]; >> > - uint32_t count[MAX_NUM_TR_CATEGORIES]; >> > + uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]; >> > + uint32_t nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]; >> > + uint32_t nrCount[MAX_NUM_TR_CATEGORIES]; >> > + >> > + ALIGN_VAR_16(uint16_t, (*offset)[MAX_NUM_TR_COEFFS]); >> > + uint32_t(*residualSum)[MAX_NUM_TR_COEFFS]; >> > + uint32_t *count; >> > }; >> > >> > class Quant >> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.cpp >> > --- a/source/encoder/encoder.cpp Tue Aug 18 12:45:52 2015 +0530 >> > +++ b/source/encoder/encoder.cpp Wed Aug 19 15:29:20 2015 +0530 >> > @@ -68,6 +68,7 @@ >> > m_latestParam = NULL; >> > m_threadPool = NULL; >> > m_analysisFile = NULL; >> > + m_offsetEmergency = NULL; >> > for (int i = 0; i < X265_MAX_FRAME_THREADS; i++) >> > m_frameEncoder[i] = NULL; >> > >> > @@ -187,6 +188,7 @@ >> > { >> > x265_log(m_param, X265_LOG_ERROR, "Unable to allocate scaling >> list arrays\n"); >> > m_aborted = true; >> > + return; >> > } >> > else if (!m_param->scalingLists || !strcmp(m_param->scalingLists, >> "off")) >> > m_scalingList.m_bEnabled = false; >> > @@ -194,7 +196,6 @@ >> > m_scalingList.setDefaultScalingList(); >> > else if (m_scalingList.parseScalingList(m_param->scalingLists)) >> > m_aborted = true; >> > - m_scalingList.setupQuantMatrices(); >> > >> > m_lookahead = new Lookahead(m_param, m_threadPool); >> > if (m_numPools) >> > @@ -221,6 +222,83 @@ >> > } >> > } >> > >> > + if (m_param->rc.vbvBufferSize) >> > + { >> > + m_offsetEmergency = >> (uint16_t(*)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS])X265_MALLOC(uint16_t, >> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS * (QP_MAX_MAX - QP_MAX_SPEC)); >> > + if (!m_offsetEmergency) >> > + { >> > + x265_log(m_param, X265_LOG_ERROR, "Unable to allocate >> memory\n"); >> > + m_aborted = true; >> > + return; >> > + } >> > + >> > + bool scalingEnabled = m_scalingList.m_bEnabled; >> > + if (!scalingEnabled) >> > + { >> > + m_scalingList.setDefaultScalingList(); >> > + m_scalingList.setupQuantMatrices(); >> > + } >> > + else >> > + m_scalingList.setupQuantMatrices(); >> > + >> > + for (int q = 0; q < QP_MAX_MAX - QP_MAX_SPEC; q++) >> > + { >> > + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++) >> > + { >> > + uint16_t *nrOffset = m_offsetEmergency[q][cat]; >> > + >> > + int trSize = cat & 3; >> > + >> > + int coefCount = 1 << ((trSize + 2) * 2); >> > + >> > + /* Denoise chroma first then luma, then DC. */ >> > + int dcThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3; >> > + int lumaThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3; >> > + int chromaThreshold = 0; >> > + >> > + int thresh = (cat < 4 || (cat >= 8 && cat < 12)) ? >> lumaThreshold : chromaThreshold; >> > + >> > + double quantF = (double)(1ULL << (q / 6 + 16 + 8)); >> > + >> > + for (int i = 0; i < coefCount; i++) >> > + { >> > + uint16_t max = (1 << (7 + X265_DEPTH)) - 1; >> > + /* True "emergency mode": remove all DCT >> coefficients */ >> > + if (q == QP_MAX_MAX - QP_MAX_SPEC - 1) >> > + { >> > + nrOffset[i] = max; >> > + continue; >> > + } >> > + >> > + int iThresh = i == 0 ? dcThreshold : thresh; >> > + if (q < iThresh) >> > + { >> > + nrOffset[i] = 0; >> > + continue; >> > + } >> > + >> > + int numList = (cat >= 8) * 3 + ((int)!iThresh); >> > + >> > + double pos = (double)(q - iThresh + 1) / >> (QP_MAX_MAX - QP_MAX_SPEC - iThresh); >> > + double start = quantF / >> (m_scalingList.m_quantCoef[trSize][numList][QP_MAX_SPEC % 6][i]); >> > + >> > + // Formula chosen as an exponential scale to >> vaguely mimic the effects of a higher quantizer. >> > + double bias = (pow(2, pos * (QP_MAX_MAX - >> QP_MAX_SPEC)) * 0.003 - 0.003) * start; >> > + nrOffset[i] = (uint16_t)X265_MIN(bias + 0.5, max); >> > + } >> > + } >> > + } >> > + >> > + if (!scalingEnabled) >> > + { >> > + m_scalingList.m_bEnabled = false; >> > + m_scalingList.m_bDataPresent = false; >> > + m_scalingList.setupQuantMatrices(); >> > + } >> > + } >> > + else >> > + m_scalingList.setupQuantMatrices(); >> > + >> > for (int i = 0; i < m_param->frameNumThreads; i++) >> > { >> > m_frameEncoder[i]->start(); >> > @@ -314,6 +392,8 @@ >> > delete m_rateControl; >> > } >> > >> > + X265_FREE(m_offsetEmergency); >> > + >> > if (m_analysisFile) >> > fclose(m_analysisFile); >> > >> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.h >> > --- a/source/encoder/encoder.h Tue Aug 18 12:45:52 2015 +0530 >> > +++ b/source/encoder/encoder.h Wed Aug 19 15:29:20 2015 +0530 >> > @@ -130,6 +130,10 @@ >> > bool m_aborted; // fatal error detected >> > bool m_reconfigured; // reconfigure of encoder >> detected >> > >> > + uint16_t >> (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]; >> > + ALIGN_VAR_32(uint32_t, >> m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]); >> > + uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES]; >> > + >> > Encoder(); >> > ~Encoder() {} >> > >> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/frameencoder.cpp >> > --- a/source/encoder/frameencoder.cpp Tue Aug 18 12:45:52 2015 +0530 >> > +++ b/source/encoder/frameencoder.cpp Wed Aug 19 15:29:20 2015 +0530 >> > @@ -135,7 +135,7 @@ >> > ok &= m_rce.picTimingSEI && m_rce.hrdTiming; >> > } >> > >> > - if (m_param->noiseReductionIntra || m_param->noiseReductionInter) >> > + if (m_param->noiseReductionIntra || m_param->noiseReductionInter >> || m_param->rc.vbvBufferSize) >> > m_nr = X265_MALLOC(NoiseReduction, 1); >> > if (m_nr) >> > memset(m_nr, 0, sizeof(NoiseReduction)); >> > @@ -362,11 +362,47 @@ >> > } >> > } >> > >> > + int numTLD; >> > + if (m_pool) >> > + numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : >> m_pool->m_numWorkers + m_pool->m_numProviders; >> > + else >> > + numTLD = 1; >> > + >> > /* Get the QP for this frame from rate control. This call may >> block until >> > * frames ahead of it in encode order have called rateControlEnd() >> */ >> > int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, >> m_top); >> > m_rce.newQp = qp; >> > >> > + if (m_nr) >> > + { >> > + if (qp > QP_MAX_SPEC && m_frame->m_param->rc.vbvBufferSize) >> > + { >> > + for (int i = 0; i < numTLD; i++) >> > + { >> > + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = >> m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1]; >> > + >> m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = >> m_top->m_residualSumEmergency; >> > + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = >> m_top->m_countEmergency; >> > + } >> > + } >> > + else >> > + { >> > + if (m_param->noiseReductionIntra || >> m_param->noiseReductionInter) >> > + { >> > + for (int i = 0; i < numTLD; i++) >> > + { >> > + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset >> = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise; >> > + >> m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = >> m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum; >> > + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count >> = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount; >> > + } >> > + } >> > + else >> > + { >> > + for (int i = 0; i < numTLD; i++) >> > + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset >> = NULL; >> > + } >> > + } >> > + } >> > + >> > /* Clip slice QP to 0-51 spec range before encoding */ >> > slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp); >> > >> > @@ -699,37 +735,36 @@ >> > } >> > } >> > >> > - int numTLD; >> > - if (m_pool) >> > - numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : >> m_pool->m_numWorkers + m_pool->m_numProviders; >> > - else >> > - numTLD = 1; >> > - >> > if (m_nr) >> > { >> > - /* Accumulate NR statistics from all worker threads */ >> > - for (int i = 0; i < numTLD; i++) >> > - { >> > - NoiseReduction* nr = >> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId]; >> > - for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++) >> > - { >> > - for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++) >> > - m_nr->residualSum[cat][coeff] += >> nr->residualSum[cat][coeff]; >> > - >> > - m_nr->count[cat] += nr->count[cat]; >> > - } >> > - } >> > - >> > - noiseReductionUpdate(); >> > - >> > - /* Copy updated NR coefficients back to all worker threads */ >> > - for (int i = 0; i < numTLD; i++) >> > - { >> > - NoiseReduction* nr = >> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId]; >> > - memcpy(nr->offsetDenoise, m_nr->offsetDenoise, >> sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); >> > - memset(nr->count, 0, sizeof(uint32_t) * >> MAX_NUM_TR_CATEGORIES); >> > - memset(nr->residualSum, 0, sizeof(uint32_t) * >> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); >> > - } >> > + bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC || >> !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra || >> m_param->noiseReductionInter); >> > + >> > + if (nrEnabled) >> > + { >> > + /* Accumulate NR statistics from all worker threads */ >> > + for (int i = 0; i < numTLD; i++) >> > + { >> > + NoiseReduction* nr = >> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId]; >> > + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++) >> > + { >> > + for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; >> coeff++) >> > + m_nr->nrResidualSum[cat][coeff] += >> nr->nrResidualSum[cat][coeff]; >> > + >> > + m_nr->nrCount[cat] += nr->nrCount[cat]; >> > + } >> > + } >> > + >> > + noiseReductionUpdate(); >> > + >> > + /* Copy updated NR coefficients back to all worker threads >> */ >> > + for (int i = 0; i < numTLD; i++) >> > + { >> > + NoiseReduction* nr = >> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId]; >> > + memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise, >> sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); >> > + memset(nr->nrCount, 0, sizeof(uint32_t)* >> MAX_NUM_TR_CATEGORIES); >> > + memset(nr->nrResidualSum, 0, sizeof(uint32_t)* >> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); >> > + } >> > + } >> > } >> > >> > #if DETAILED_CU_STATS >> > @@ -1253,25 +1288,25 @@ >> > int trSize = cat & 3; >> > int coefCount = 1 << ((trSize + 2) * 2); >> > >> > - if (m_nr->count[cat] > maxBlocksPerTrSize[trSize]) >> > + if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize]) >> > { >> > for (int i = 0; i < coefCount; i++) >> > - m_nr->residualSum[cat][i] >>= 1; >> > - m_nr->count[cat] >>= 1; >> > + m_nr->nrResidualSum[cat][i] >>= 1; >> > + m_nr->nrCount[cat] >>= 1; >> > } >> > >> > int nrStrength = cat < 8 ? m_param->noiseReductionIntra : >> m_param->noiseReductionInter; >> > - uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat]; >> > + uint64_t scaledCount = (uint64_t)nrStrength * >> m_nr->nrCount[cat]; >> > >> > for (int i = 0; i < coefCount; i++) >> > { >> > - uint64_t value = scaledCount + m_nr->residualSum[cat][i] / >> 2; >> > - uint64_t denom = m_nr->residualSum[cat][i] + 1; >> > - m_nr->offsetDenoise[cat][i] = (uint16_t)(value / denom); >> > + uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i] >> / 2; >> > + uint64_t denom = m_nr->nrResidualSum[cat][i] + 1; >> > + m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom); >> > } >> > >> > // Don't denoise DC coefficients >> > - m_nr->offsetDenoise[cat][0] = 0; >> > + m_nr->nrOffsetDenoise[cat][0] = 0; >> > } >> > } >> > >> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/search.cpp >> > --- a/source/encoder/search.cpp Tue Aug 18 12:45:52 2015 +0530 >> > +++ b/source/encoder/search.cpp Wed Aug 19 15:29:20 2015 +0530 >> > @@ -80,7 +80,7 @@ >> > m_me.init(param.searchMethod, param.subpelRefine, >> param.internalCsp); >> > >> > bool ok = m_quant.init(param.rdoqLevel, param.psyRdoq, >> scalingList, m_entropyCoder); >> > - if (m_param->noiseReductionIntra || m_param->noiseReductionInter) >> > + if (m_param->noiseReductionIntra || m_param->noiseReductionInter >> || m_param->rc.vbvBufferSize) >> > ok &= m_quant.allocNoiseReduction(param); >> > >> > ok &= Predict::allocBuffers(param.internalCsp); /* sets >> m_hChromaShift & m_vChromaShift */ >> > _______________________________________________ >> > x265-devel mailing list >> > x265-devel@videolan.org >> > https://mailman.videolan.org/listinfo/x265-devel >> >> -- >> Steve Borho >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel