Both the patches are same, you can apply on top of my previous two patches.
On Thu, Sep 28, 2017 at 12:07 PM, Pradeep Ramachandran < [email protected]> wrote: > On Tue, Sep 26, 2017 at 6:49 PM, Ashok Kumar Mishra < > [email protected]> wrote: > >> Please find the attached patch. >> > > This patch is confusing - is this to be applied on top of the previous > patch (which didn't work), or is this a replacement patch (which didn't > work either as I can't find the parent). > > >> >> On Thu, Sep 21, 2017 at 8:21 PM, <[email protected]> wrote: >> >>> # HG changeset patch >>> # User Ashok Kumar Mishra <[email protected]> >>> # Date 1506005452 -19800 >>> # Thu Sep 21 20:20:52 2017 +0530 >>> # Node ID 546387e0b983ac1d68cda73777b34a122928cd32 >>> # Parent 71f700844b0b2a9120bfd8a2d1f13e219aa20677 >>> vbv hanging issue; fix for multiple slices >>> When multiple slices are enabled, vbv rate control must take care of >>> correct rows in slices, since multiple slices are encoding >>> simultaneously. >>> >>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.cpp >>> --- a/source/encoder/frameencoder.cpp Tue Sep 12 18:13:03 2017 +0530 >>> +++ b/source/encoder/frameencoder.cpp Thu Sep 21 20:20:52 2017 +0530 >>> @@ -88,6 +88,7 @@ >>> delete[] m_outStreams; >>> delete[] m_backupStreams; >>> X265_FREE(m_sliceBaseRow); >>> + X265_FREE(m_sliceMaxBlockRow); >>> X265_FREE(m_cuGeoms); >>> X265_FREE(m_ctuGeomMap); >>> X265_FREE(m_substreamSizes); >>> @@ -118,6 +119,40 @@ >>> >>> m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); >>> ok &= !!m_sliceBaseRow; >>> + m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / >>> m_param->maxSlices; >>> + uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices; >>> + uint32_t rowSum = sliceGroupSizeAccu; >>> + uint32_t sidx = 0; >>> + for (uint32_t i = 0; i < m_numRows; i++) >>> + { >>> + const uint32_t rowRange = (rowSum >> 8); >>> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >>> + { >>> + rowSum += sliceGroupSizeAccu; >>> + m_sliceBaseRow[++sidx] = i; >>> + } >>> + } >>> + X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); >>> + m_sliceBaseRow[0] = 0; >>> + m_sliceBaseRow[m_param->maxSlices] = m_numRows; >>> + >>> + m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); >>> + ok &= !!m_sliceMaxBlockRow; >>> + uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16; >>> + sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices; >>> + rowSum = sliceGroupSizeAccu; >>> + sidx = 0; >>> + for (uint32_t i = 0; i < maxBlockRows; i++) >>> + { >>> + const uint32_t rowRange = (rowSum >> 8); >>> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >>> + { >>> + rowSum += sliceGroupSizeAccu; >>> + m_sliceMaxBlockRow[++sidx] = i; >>> + } >>> + } >>> + m_sliceMaxBlockRow[0] = 0; >>> + m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows; >>> >>> /* determine full motion search range */ >>> int range = m_param->searchRange; /* fpel search */ >>> @@ -341,6 +376,8 @@ >>> m_completionCount = 0; >>> m_bAllRowsStop = false; >>> m_vbvResetTriggerRow = -1; >>> + m_rowSliceTotalBits[0] = 0; >>> + m_rowSliceTotalBits[1] = 0; >>> >>> m_SSDY = m_SSDU = m_SSDV = 0; >>> m_ssim = 0; >>> @@ -550,28 +587,13 @@ >>> >>> /* reset entropy coders and compute slice id */ >>> m_entropyCoder.load(m_initSliceContext); >>> - const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - >>> 1) / m_param->maxSlices; >>> - const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / >>> m_param->maxSlices; >>> - m_sliceGroupSize = (uint16_t)sliceGroupSize; >>> + >>> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >>> + for (uint32_t row = m_sliceBaseRow[sliceId]; row < >>> m_sliceBaseRow[sliceId + 1]; row++) >>> + m_rows[row].init(m_initSliceContext, sliceId); >>> >>> - uint32_t rowSum = sliceGroupSizeAccu; >>> - uint32_t sidx = 0; >>> - for (uint32_t i = 0; i < m_numRows; i++) >>> - { >>> - const uint32_t rowRange = (rowSum >> 8); >>> - >>> - if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >>> - { >>> - rowSum += sliceGroupSizeAccu; >>> - m_sliceBaseRow[++sidx] = i; >>> - } >>> - >>> - m_rows[i].init(m_initSliceContext, sidx); >>> - } >>> - X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); >>> - >>> - m_sliceBaseRow[0] = 0; >>> - m_sliceBaseRow[m_param->maxSlices] = m_numRows; >>> + // reset slice counter for rate control update >>> + m_sliceCnt = 0; >>> >>> uint32_t numSubstreams = m_param->bEnableWavefront ? >>> slice->m_sps->numCuInHeight : m_param->maxSlices; >>> X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), >>> "Multiple slices without WPP unsupport now!"); >>> @@ -586,8 +608,10 @@ >>> m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]); >>> } >>> else >>> + { >>> for (uint32_t i = 0; i < numSubstreams; i++) >>> m_outStreams[i].resetBits(); >>> + } >>> >>> int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0; >>> >>> @@ -697,10 +721,9 @@ >>> * compressed in a wave-front pattern if WPP is enabled. Row based >>> loop >>> * filters runs behind the CTU compression and reconstruction */ >>> >>> - for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >>> - { >>> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >>> m_rows[m_sliceBaseRow[sliceId]].active = true; >>> - } >>> + >>> if (m_param->bEnableWavefront) >>> { >>> int i = 0; >>> @@ -982,9 +1005,8 @@ >>> // complete the slice header by writing WPP row-starts >>> m_entropyCoder.setBitstream(&m_bs); >>> if (slice->m_pps->bEntropyCodingSyncEnabled) >>> - { >>> m_entropyCoder.codeSliceHeade >>> rWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - >>> prevSliceRow - 1), maxStreamSize); >>> - } >>> + >>> m_bs.writeByteAlignment(); >>> >>> m_nalList.serialize(slice->m_nalUnitType, m_bs); >>> @@ -1270,20 +1292,17 @@ >>> const uint32_t lineStartCUAddr = row * numCols; >>> bool bIsVbv = m_param->rc.vbvBufferSize > 0 && >>> m_param->rc.vbvMaxBitrate > 0; >>> >>> + const uint32_t sliceId = curRow.sliceId; >>> uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - >>> 1)) / 16; >>> - uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - >>> 1)) / 16; >>> uint32_t noOfBlocks = m_param->maxCUSize / 16; >>> const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - >>> 1].sliceId != curRow.sliceId)) ? 1 : 0; >>> const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || >>> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0; >>> - const uint32_t sliceId = curRow.sliceId; >>> const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1]; >>> const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; >>> >>> - if (bFirstRowInSlice && !curRow.completed) >>> - { >>> - // Load SBAC coder context from previous row and initialize row >>> state. >>> - rowCoder.load(m_initSliceContext); >>> - } >>> + // Load SBAC coder context from previous row and initialize row >>> state. >>> + if (bFirstRowInSlice && !curRow.completed) >>> + rowCoder.load(m_initSliceContext); >>> >>> // calculate mean QP for consistent deltaQP signalling calculation >>> if (m_param->bOptCUDeltaQP) >>> @@ -1294,15 +1313,12 @@ >>> if (m_param->bEnableWavefront || !row) >>> { >>> double meanQPOff = 0; >>> - uint32_t loopIncr, count = 0; >>> bool isReferenced = IS_REFERENCED(m_frame); >>> double *qpoffs = (isReferenced && m_param->rc.cuTree) ? >>> m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset; >>> if (qpoffs) >>> { >>> - if (m_param->rc.qgSize == 8) >>> - loopIncr = 8; >>> - else >>> - loopIncr = 16; >>> + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : >>> 16; >>> + >>> uint32_t cuYStart = 0, height = >>> m_frame->m_fencPic->m_picHeight; >>> if (m_param->bEnableWavefront) >>> { >>> @@ -1312,6 +1328,7 @@ >>> >>> uint32_t qgSize = m_param->rc.qgSize, width = >>> m_frame->m_fencPic->m_picWidth; >>> uint32_t maxOffsetCols = >>> (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr; >>> + uint32_t count = 0; >>> for (uint32_t cuY = cuYStart; cuY < height && (cuY >>> < m_frame->m_fencPic->m_picHeight); cuY += qgSize) >>> { >>> for (uint32_t cuX = 0; cuX < width; cuX += >>> qgSize) >>> @@ -1372,16 +1389,16 @@ >>> curRow.bufferedEntropy.copyState(rowCoder); >>> curRow.bufferedEntropy.loadContexts(rowCoder); >>> } >>> - if (!row && m_vbvResetTriggerRow != intRow) >>> + if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow) >>> { >>> curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc; >>> curEncData.m_rowStat[row].rowQpScale = >>> x265_qp2qScale(curEncData.m_avgQpRc); >>> } >>> >>> FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; >>> - if (m_param->bEnableWavefront && row >= col && row && >>> m_vbvResetTriggerRow != intRow) >>> + if (m_param->bEnableWavefront && rowInSlice >= col && >>> !bFirstRowInSlice && m_vbvResetTriggerRow != intRow) >>> cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + >>> 1].baseQp; >>> - else if (!m_param->bEnableWavefront && row && >>> m_vbvResetTriggerRow != intRow) >>> + else if (!m_param->bEnableWavefront && !bFirstRowInSlice && >>> m_vbvResetTriggerRow != intRow) >>> cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp; >>> else >>> cuStat.baseQp = curEncData.m_rowStat[row].rowQp; >>> @@ -1393,7 +1410,8 @@ >>> { >>> cuStat.vbvCost = 0; >>> cuStat.intraVbvCost = 0; >>> - for (uint32_t h = 0; h < noOfBlocks && block_y < >>> maxBlockRows; h++, block_y++) >>> + >>> + for (uint32_t h = 0; h < noOfBlocks && block_y < >>> m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++) >>> { >>> uint32_t idx = block_x + (block_y * maxBlockCols); >>> >>> @@ -1497,10 +1515,8 @@ >>> int shift = 2 * (m_param->maxCUDepth - depth); >>> int cuSize = m_param->maxCUSize >> depth; >>> >>> - if (cuSize == 8) >>> - curRow.rowStats.intra8x8Cnt += >>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN); >>> - else >>> - curRow.rowStats.intra8x8Cnt += >>> (int)(frameLog.cntIntra[depth] << shift); >>> + curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? >>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) : >>> + >>> (int)(frameLog.cntIntra[depth] << shift); >>> >>> curRow.rowStats.inter8x8Cnt += >>> (int)(frameLog.cntInter[depth] << shift); >>> curRow.rowStats.skip8x8Cnt += >>> (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << >>> shift); >>> @@ -1530,12 +1546,13 @@ >>> if (bIsVbv) >>> { >>> // Update encoded bits, satdCost, baseQP for each CU if >>> tune grain is disabled >>> - if ((m_param->bEnableWavefront && (!cuAddr || >>> !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront) >>> + FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; >>> + if ((m_param->bEnableWavefront && ((cuAddr == >>> m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || >>> !m_param->bEnableWavefront) >>> { >>> - curEncData.m_rowStat[row].rowSatd += >>> curEncData.m_cuStat[cuAddr].vbvCost; >>> - curEncData.m_rowStat[row].rowIntraSatd += >>> curEncData.m_cuStat[cuAddr].intraVbvCost; >>> - curEncData.m_rowStat[row].encodedBits += >>> curEncData.m_cuStat[cuAddr].totalBits; >>> - curEncData.m_rowStat[row].sumQpRc += >>> curEncData.m_cuStat[cuAddr].baseQp; >>> + curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost; >>> + curEncData.m_rowStat[row].rowIntraSatd += >>> cuStat.intraVbvCost; >>> + curEncData.m_rowStat[row].encodedBits += >>> cuStat.totalBits; >>> + curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp; >>> curEncData.m_rowStat[row].numEncodedCUs = cuAddr; >>> } >>> >>> @@ -1543,7 +1560,7 @@ >>> if (!m_param->bEnableWavefront && col == numCols - 1) >>> { >>> double qpBase = curEncData.m_cuStat[cuAddr].baseQp; >>> - int reEncode = >>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>> row, &m_rce, qpBase); >>> + int reEncode = >>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId); >>> qpBase = x265_clip3((double)m_param->rc.qpMin, >>> (double)m_param->rc.qpMax, qpBase); >>> curEncData.m_rowStat[row].rowQp = qpBase; >>> curEncData.m_rowStat[row].rowQpScale = >>> x265_qp2qScale(qpBase); >>> @@ -1569,15 +1586,16 @@ >>> } >>> } >>> // If current block is at row diagonal checkpoint, call vbv >>> ratecontrol. >>> - else if (m_param->bEnableWavefront && row == col && row) >>> + else if (m_param->bEnableWavefront && rowInSlice == col && >>> !bFirstRowInSlice) >>> { >>> if (m_param->rc.bEnableConstVbv) >>> { >>> - int32_t startCuAddr = numCols * row; >>> - int32_t EndCuAddr = startCuAddr + col; >>> - for (int32_t r = row; r >= 0; r--) >>> + uint32_t startCuAddr = numCols * row; >>> + uint32_t EndCuAddr = startCuAddr + col; >>> + >>> + for (int32_t r = row; r >= >>> (int32_t)m_sliceBaseRow[sliceId]; r--) >>> { >>> - for (int32_t c = startCuAddr; c <= EndCuAddr && >>> c <= (int32_t)numCols * (r + 1) - 1; c++) >>> + for (uint32_t c = startCuAddr; c <= EndCuAddr >>> && c <= numCols * (r + 1) - 1; c++) >>> { >>> curEncData.m_rowStat[r].rowSatd += >>> curEncData.m_cuStat[c].vbvCost; >>> curEncData.m_rowStat[r].rowIntraSatd += >>> curEncData.m_cuStat[c].intraVbvCost; >>> @@ -1590,10 +1608,10 @@ >>> } >>> } >>> double qpBase = curEncData.m_cuStat[cuAddr].baseQp; >>> - int reEncode = >>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>> row, &m_rce, qpBase); >>> + int reEncode = >>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId); >>> qpBase = x265_clip3((double)m_param->rc.qpMin, >>> (double)m_param->rc.qpMax, qpBase); >>> curEncData.m_rowStat[row].rowQp = qpBase; >>> - curEncData.m_rowStat[row].rowQpScale = >>> x265_qp2qScale(qpBase); >>> + curEncData.m_rowStat[row].rowQpScale = >>> x265_qp2qScale(qpBase); >>> >>> if (reEncode < 0) >>> { >>> @@ -1604,7 +1622,7 @@ >>> m_vbvResetTriggerRow = row; >>> m_bAllRowsStop = true; >>> >>> - for (uint32_t r = m_numRows - 1; r >= row; r--) >>> + for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; >>> r >= row; r--) >>> { >>> CTURow& stopRow = m_rows[r]; >>> >>> @@ -1686,11 +1704,11 @@ >>> /* this row of CTUs has been compressed */ >>> if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv) >>> { >>> - if (row == m_numRows - 1) >>> + if (bLastRowInSlice) >>> { >>> - for (int32_t r = 0; r < (int32_t)m_numRows; r++) >>> + for (uint32_t r = m_sliceBaseRow[sliceId]; r < >>> m_sliceBaseRow[sliceId + 1]; r++) >>> { >>> - for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs >>> + 1; c < (int32_t)numCols * (r + 1); c++) >>> + for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs >>> + 1; c < numCols * (r + 1); c++) >>> { >>> curEncData.m_rowStat[r].rowSatd += >>> curEncData.m_cuStat[c].vbvCost; >>> curEncData.m_rowStat[r].rowIntraSatd += >>> curEncData.m_cuStat[c].intraVbvCost; >>> @@ -1708,26 +1726,41 @@ >>> * after half the frame is encoded, but after this initial period >>> we update >>> * after refLagRows (the number of rows reference frames must have >>> completed >>> * before referencees may begin encoding) */ >>> - uint32_t rowCount = 0; >>> if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv) >>> { >>> + uint32_t rowCount = 0; >>> + uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - >>> m_sliceBaseRow[sliceId]; >>> if (!m_rce.encodeOrder) >>> - rowCount = m_numRows - 1; >>> + rowCount = maxRows - 1; >>> else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / >>> m_param->fpsDenom)) >>> - rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1); >>> + rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1); >>> else >>> - rowCount = X265_MIN(m_refLagRows, m_numRows - 1); >>> - if (row == rowCount) >>> + rowCount = X265_MIN(m_refLagRows, maxRows - 1); >>> + >>> + if (rowInSlice == rowCount / m_param->maxSlices) >>> { >>> - m_rce.rowTotalBits = 0; >>> + m_rowSliceTotalBits[sliceId] = 0; >>> if (bIsVbv) >>> - for (uint32_t i = 0; i < rowCount; i++) >>> - m_rce.rowTotalBits += curEncData.m_rowStat[i].encode >>> dBits; >>> + { >>> + for (uint32_t i = m_sliceBaseRow[sliceId]; i < >>> (rowCount / m_param->maxSlices) + m_sliceBaseRow[sliceId]; i++) >>> + m_rowSliceTotalBits[sliceId] += >>> curEncData.m_rowStat[i].encodedBits; >>> + } >>> else >>> - for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; >>> cuAddr++) >>> - m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].to >>> talBits; >>> + { >>> + uint32_t startAddr = rowCount * numCols * sliceId; >>> + uint32_t finishAddr = startAddr + rowCount * numCols; >>> + >>> + for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; >>> cuAddr++) >>> + m_rowSliceTotalBits[sliceId] += >>> curEncData.m_cuStat[cuAddr].totalBits; >>> + } >>> >>> - m_top->m_rateControl->rateControlUpdateStats(&m_rce); >>> + if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices) >>> + { >>> + m_rce.rowTotalBits = 0; >>> + for (uint32_t i = 0; i < m_param->maxSlices; i++) >>> + m_rce.rowTotalBits += m_rowSliceTotalBits[i]; >>> + m_top->m_rateControl->rateControlUpdateStats(&m_rce); >>> + } >>> } >>> } >>> >>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.h >>> --- a/source/encoder/frameencoder.h Tue Sep 12 18:13:03 2017 +0530 >>> +++ b/source/encoder/frameencoder.h Thu Sep 21 20:20:52 2017 +0530 >>> @@ -138,6 +138,7 @@ >>> volatile bool m_bAllRowsStop; >>> volatile int m_completionCount; >>> volatile int m_vbvResetTriggerRow; >>> + volatile int m_sliceCnt; >>> >>> uint32_t m_numRows; >>> uint32_t m_numCols; >>> @@ -147,8 +148,10 @@ >>> >>> CTURow* m_rows; >>> uint16_t m_sliceAddrBits; >>> - uint16_t m_sliceGroupSize; >>> - uint32_t* m_sliceBaseRow; >>> + uint32_t m_sliceGroupSize; >>> + uint32_t* m_sliceBaseRow; >>> + uint32_t* m_sliceMaxBlockRow; >>> + int64_t m_rowSliceTotalBits[2]; >>> RateControlEntry m_rce; >>> SEIDecodedPictureHash m_seiReconPictureDigest; >>> >>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.cpp >>> --- a/source/encoder/ratecontrol.cpp Tue Sep 12 18:13:03 2017 +0530 >>> +++ b/source/encoder/ratecontrol.cpp Thu Sep 21 20:20:52 2017 +0530 >>> @@ -732,7 +732,6 @@ >>> m_bitrate = m_param->rc.bitrate * 1000; >>> } >>> >>> - >>> void RateControl::initHRD(SPS& sps) >>> { >>> int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; >>> @@ -765,6 +764,7 @@ >>> >>> #undef MAX_DURATION >>> } >>> + >>> bool RateControl::analyseABR2Pass(uint64_t allAvailableBits) >>> { >>> double rateFactor, stepMult; >>> @@ -1473,6 +1473,7 @@ >>> >>> return q; >>> } >>> + >>> double RateControl::countExpectedBits(int startPos, int endPos) >>> { >>> double expectedBits = 0; >>> @@ -1484,6 +1485,7 @@ >>> } >>> return expectedBits; >>> } >>> + >>> bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int >>> over, int endPos) >>> { >>> /* find an interval ending on an overflow or underflow (depending >>> on whether >>> @@ -1531,6 +1533,7 @@ >>> } >>> return adjusted; >>> } >>> + >>> bool RateControl::cuTreeReadFor2Pass(Frame* frame) >>> { >>> int index = m_encOrder[frame->m_poc]; >>> @@ -1579,24 +1582,24 @@ >>> double RateControl::tuneAbrQScaleFromFeedback(double qScale) >>> { >>> double abrBuffer = 2 * m_rateTolerance * m_bitrate; >>> - /* use framesDone instead of POC as poc count is not serial >>> with bframes enabled */ >>> - double overflow = 1.0; >>> - double timeDone = (double)(m_framesDone - >>> m_param->frameNumThreads + 1) * m_frameDuration; >>> - double wantedBits = timeDone * m_bitrate; >>> - int64_t encodedBits = m_totalBits; >>> - if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) >>> - { >>> - abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); >>> - encodedBits = m_encodedBits; >>> - } >>> + /* use framesDone instead of POC as poc count is not serial with >>> bframes enabled */ >>> + double overflow = 1.0; >>> + double timeDone = (double)(m_framesDone - m_param->frameNumThreads >>> + 1) * m_frameDuration; >>> + double wantedBits = timeDone * m_bitrate; >>> + int64_t encodedBits = m_totalBits; >>> + if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) >>> + { >>> + abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); >>> + encodedBits = m_encodedBits; >>> + } >>> >>> - if (wantedBits > 0 && encodedBits > 0 && >>> (!m_partialResidualFrames || >>> - m_param->rc.bStrictCbr || m_isGrainEnabled)) >>> - { >>> - abrBuffer *= X265_MAX(1, sqrt(timeDone)); >>> - overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - >>> wantedBits) / abrBuffer); >>> - qScale *= overflow; >>> - } >>> + if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames >>> || >>> + m_param->rc.bStrictCbr || m_isGrainEnabled)) >>> + { >>> + abrBuffer *= X265_MAX(1, sqrt(timeDone)); >>> + overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) >>> / abrBuffer); >>> + qScale *= overflow; >>> + } >>> return qScale; >>> } >>> >>> @@ -2330,17 +2333,18 @@ >>> return totalSatdBits + encodedBitsSoFar; >>> } >>> >>> -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, >>> RateControlEntry* rce, double& qpVbv) >>> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, >>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t >>> sliceId) >>> { >>> FrameData& curEncData = *curFrame->m_encData; >>> double qScaleVbv = x265_qp2qScale(qpVbv); >>> uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd; >>> double encodedBits = curEncData.m_rowStat[row].encodedBits; >>> + uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; >>> >>> - if (m_param->bEnableWavefront && row == 1) >>> + if (m_param->bEnableWavefront && rowInSlice == 1) >>> { >>> - rowSatdCost += curEncData.m_rowStat[0].rowSatd; >>> - encodedBits += curEncData.m_rowStat[0].encodedBits; >>> + rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd; >>> + encodedBits += curEncData.m_rowStat[row - 1].encodedBits; >> >> } >>> rowSatdCost >>= X265_DEPTH - 8; >>> updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, >>> encodedBits); >>> @@ -2350,8 +2354,8 @@ >>> if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp) >>> { >>> uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowI >>> ntraSatd; >>> - if (m_param->bEnableWavefront && row == 1) >>> - intraRowSatdCost += curEncData.m_rowStat[0].rowInt >>> raSatd; >>> + if (m_param->bEnableWavefront && rowInSlice == 1) >>> + intraRowSatdCost += curEncData.m_rowStat[row - >>> 1].rowIntraSatd; >>> intraRowSatdCost >>= X265_DEPTH - 8; >>> updatePredictor(rce->rowPred[1], qScaleVbv, >>> (double)intraRowSatdCost, encodedBits); >>> } >>> @@ -2376,7 +2380,7 @@ >>> const SPS& sps = *curEncData.m_slice->m_sps; >>> double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight); >>> >>> - if (row < sps.numCuInHeight - 1) >>> + if (row < m_sliceBaseRow[sliceId + 1] - 1) >>> { >>> /* More threads means we have to be more cautious in letting >>> ratecontrol use up extra bits. */ >>> double rcTol = bufferLeftPlanned / m_param->frameNumThreads * >>> m_rateTolerance; >>> @@ -2693,8 +2697,8 @@ >>> m_encodedBitsWindow[pos % s_slidingWindowFrames] = >>> actualBits; >>> if(rce->sliceType != I_SLICE) >>> { >>> - int qp = int (rce->qpaRc + 0.5); >>> - m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? >>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; >>> + int qp = int (rce->qpaRc + 0.5); >>> + m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? >>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; >>> } >>> curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow; >>> curFrame->m_rcData->cplxrSum = m_cplxrSum; >>> @@ -2779,7 +2783,8 @@ >>> curFrame->m_encData->m_frameStats.percent8x8Skip * m_ncu) >>> < 0) >>> goto writeFailure; >>> } >>> - else{ >>> + else >>> + { >>> RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps; >>> int i, num = rpsWriter->numberOfPictures; >>> char deltaPOC[128]; >>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.h >>> --- a/source/encoder/ratecontrol.h Tue Sep 12 18:13:03 2017 +0530 >>> +++ b/source/encoder/ratecontrol.h Thu Sep 21 20:20:52 2017 +0530 >>> @@ -244,7 +244,7 @@ >>> int rateControlStart(Frame* curFrame, RateControlEntry* rce, >>> Encoder* enc); >>> void rateControlUpdateStats(RateControlEntry* rce); >>> int rateControlEnd(Frame* curFrame, int64_t bits, >>> RateControlEntry* rce, int *filler); >>> - int rowVbvRateControl(Frame* curFrame, uint32_t row, >>> RateControlEntry* rce, double& qpVbv); >>> + int rowVbvRateControl(Frame* curFrame, uint32_t row, >>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t >>> sliceId); >>> int rateControlSliceType(int frameNum); >>> bool cuTreeReadFor2Pass(Frame* curFrame); >>> void hrdFullness(SEIBufferingPeriod* sei); >>> >> >> >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel >> >> > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
