Please find the attached patch. On Thu, Sep 21, 2017 at 8:21 PM, <[email protected]> wrote:
> # HG changeset patch > # User Ashok Kumar Mishra <[email protected]> > # Date 1506005452 -19800 > # Thu Sep 21 20:20:52 2017 +0530 > # Node ID 546387e0b983ac1d68cda73777b34a122928cd32 > # Parent 71f700844b0b2a9120bfd8a2d1f13e219aa20677 > vbv hanging issue; fix for multiple slices > When multiple slices are enabled, vbv rate control must take care of > correct rows in slices, since multiple slices are encoding simultaneously. > > diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Tue Sep 12 18:13:03 2017 +0530 > +++ b/source/encoder/frameencoder.cpp Thu Sep 21 20:20:52 2017 +0530 > @@ -88,6 +88,7 @@ > delete[] m_outStreams; > delete[] m_backupStreams; > X265_FREE(m_sliceBaseRow); > + X265_FREE(m_sliceMaxBlockRow); > X265_FREE(m_cuGeoms); > X265_FREE(m_ctuGeomMap); > X265_FREE(m_substreamSizes); > @@ -118,6 +119,40 @@ > > m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); > ok &= !!m_sliceBaseRow; > + m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / > m_param->maxSlices; > + uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices; > + uint32_t rowSum = sliceGroupSizeAccu; > + uint32_t sidx = 0; > + for (uint32_t i = 0; i < m_numRows; i++) > + { > + const uint32_t rowRange = (rowSum >> 8); > + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) > + { > + rowSum += sliceGroupSizeAccu; > + m_sliceBaseRow[++sidx] = i; > + } > + } > + X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); > + m_sliceBaseRow[0] = 0; > + m_sliceBaseRow[m_param->maxSlices] = m_numRows; > + > + m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); > + ok &= !!m_sliceMaxBlockRow; > + uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16; > + sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices; > + rowSum = sliceGroupSizeAccu; > + sidx = 0; > + for (uint32_t i = 0; i < maxBlockRows; i++) > + { > + const uint32_t rowRange = (rowSum >> 8); > + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) > + { > + rowSum += sliceGroupSizeAccu; > + m_sliceMaxBlockRow[++sidx] = i; > + } > + } > + m_sliceMaxBlockRow[0] = 0; > + m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows; > > /* determine full motion search range */ > int range = m_param->searchRange; /* fpel search */ > @@ -341,6 +376,8 @@ > m_completionCount = 0; > m_bAllRowsStop = false; > m_vbvResetTriggerRow = -1; > + m_rowSliceTotalBits[0] = 0; > + m_rowSliceTotalBits[1] = 0; > > m_SSDY = m_SSDU = m_SSDV = 0; > m_ssim = 0; > @@ -550,28 +587,13 @@ > > /* reset entropy coders and compute slice id */ > m_entropyCoder.load(m_initSliceContext); > - const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) > / m_param->maxSlices; > - const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / > m_param->maxSlices; > - m_sliceGroupSize = (uint16_t)sliceGroupSize; > + > + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) > + for (uint32_t row = m_sliceBaseRow[sliceId]; row < > m_sliceBaseRow[sliceId + 1]; row++) > + m_rows[row].init(m_initSliceContext, sliceId); > > - uint32_t rowSum = sliceGroupSizeAccu; > - uint32_t sidx = 0; > - for (uint32_t i = 0; i < m_numRows; i++) > - { > - const uint32_t rowRange = (rowSum >> 8); > - > - if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) > - { > - rowSum += sliceGroupSizeAccu; > - m_sliceBaseRow[++sidx] = i; > - } > - > - m_rows[i].init(m_initSliceContext, sidx); > - } > - X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); > - > - m_sliceBaseRow[0] = 0; > - m_sliceBaseRow[m_param->maxSlices] = m_numRows; > + // reset slice counter for rate control update > + m_sliceCnt = 0; > > uint32_t numSubstreams = m_param->bEnableWavefront ? > slice->m_sps->numCuInHeight : m_param->maxSlices; > X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), > "Multiple slices without WPP unsupport now!"); > @@ -586,8 +608,10 @@ > m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]); > } > else > + { > for (uint32_t i = 0; i < numSubstreams; i++) > m_outStreams[i].resetBits(); > + } > > int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0; > > @@ -697,10 +721,9 @@ > * compressed in a wave-front pattern if WPP is enabled. Row based > loop > * filters runs behind the CTU compression and reconstruction */ > > - for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) > - { > + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) > m_rows[m_sliceBaseRow[sliceId]].active = true; > - } > + > if (m_param->bEnableWavefront) > { > int i = 0; > @@ -982,9 +1005,8 @@ > // complete the slice header by writing WPP row-starts > m_entropyCoder.setBitstream(&m_bs); > if (slice->m_pps->bEntropyCodingSyncEnabled) > - { > m_entropyCoder.codeSliceHeaderWPPEntryPoints( > &m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), > maxStreamSize); > - } > + > m_bs.writeByteAlignment(); > > m_nalList.serialize(slice->m_nalUnitType, m_bs); > @@ -1270,20 +1292,17 @@ > const uint32_t lineStartCUAddr = row * numCols; > bool bIsVbv = m_param->rc.vbvBufferSize > 0 && > m_param->rc.vbvMaxBitrate > 0; > > + const uint32_t sliceId = curRow.sliceId; > uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) > / 16; > - uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1)) > / 16; > uint32_t noOfBlocks = m_param->maxCUSize / 16; > const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - > 1].sliceId != curRow.sliceId)) ? 1 : 0; > const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || > (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0; > - const uint32_t sliceId = curRow.sliceId; > const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1]; > const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; > > - if (bFirstRowInSlice && !curRow.completed) > - { > - // Load SBAC coder context from previous row and initialize row > state. > - rowCoder.load(m_initSliceContext); > - } > + // Load SBAC coder context from previous row and initialize row state. > + if (bFirstRowInSlice && !curRow.completed) > + rowCoder.load(m_initSliceContext); > > // calculate mean QP for consistent deltaQP signalling calculation > if (m_param->bOptCUDeltaQP) > @@ -1294,15 +1313,12 @@ > if (m_param->bEnableWavefront || !row) > { > double meanQPOff = 0; > - uint32_t loopIncr, count = 0; > bool isReferenced = IS_REFERENCED(m_frame); > double *qpoffs = (isReferenced && m_param->rc.cuTree) ? > m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset; > if (qpoffs) > { > - if (m_param->rc.qgSize == 8) > - loopIncr = 8; > - else > - loopIncr = 16; > + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : > 16; > + > uint32_t cuYStart = 0, height = m_frame->m_fencPic->m_ > picHeight; > if (m_param->bEnableWavefront) > { > @@ -1312,6 +1328,7 @@ > > uint32_t qgSize = m_param->rc.qgSize, width = > m_frame->m_fencPic->m_picWidth; > uint32_t maxOffsetCols = (m_frame->m_fencPic->m_picWidth > + (loopIncr - 1)) / loopIncr; > + uint32_t count = 0; > for (uint32_t cuY = cuYStart; cuY < height && (cuY < > m_frame->m_fencPic->m_picHeight); cuY += qgSize) > { > for (uint32_t cuX = 0; cuX < width; cuX += qgSize) > @@ -1372,16 +1389,16 @@ > curRow.bufferedEntropy.copyState(rowCoder); > curRow.bufferedEntropy.loadContexts(rowCoder); > } > - if (!row && m_vbvResetTriggerRow != intRow) > + if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow) > { > curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc; > curEncData.m_rowStat[row].rowQpScale = > x265_qp2qScale(curEncData.m_avgQpRc); > } > > FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; > - if (m_param->bEnableWavefront && row >= col && row && > m_vbvResetTriggerRow != intRow) > + if (m_param->bEnableWavefront && rowInSlice >= col && > !bFirstRowInSlice && m_vbvResetTriggerRow != intRow) > cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + > 1].baseQp; > - else if (!m_param->bEnableWavefront && row && > m_vbvResetTriggerRow != intRow) > + else if (!m_param->bEnableWavefront && !bFirstRowInSlice && > m_vbvResetTriggerRow != intRow) > cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp; > else > cuStat.baseQp = curEncData.m_rowStat[row].rowQp; > @@ -1393,7 +1410,8 @@ > { > cuStat.vbvCost = 0; > cuStat.intraVbvCost = 0; > - for (uint32_t h = 0; h < noOfBlocks && block_y < > maxBlockRows; h++, block_y++) > + > + for (uint32_t h = 0; h < noOfBlocks && block_y < > m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++) > { > uint32_t idx = block_x + (block_y * maxBlockCols); > > @@ -1497,10 +1515,8 @@ > int shift = 2 * (m_param->maxCUDepth - depth); > int cuSize = m_param->maxCUSize >> depth; > > - if (cuSize == 8) > - curRow.rowStats.intra8x8Cnt += > (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN); > - else > - curRow.rowStats.intra8x8Cnt += > (int)(frameLog.cntIntra[depth] << shift); > + curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? > (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) : > + > (int)(frameLog.cntIntra[depth] << shift); > > curRow.rowStats.inter8x8Cnt += > (int)(frameLog.cntInter[depth] << shift); > curRow.rowStats.skip8x8Cnt += > (int)((frameLog.cntSkipCu[depth] > + frameLog.cntMergeCu[depth]) << shift); > @@ -1530,12 +1546,13 @@ > if (bIsVbv) > { > // Update encoded bits, satdCost, baseQP for each CU if tune > grain is disabled > - if ((m_param->bEnableWavefront && (!cuAddr || > !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront) > + FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; > + if ((m_param->bEnableWavefront && ((cuAddr == > m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || > !m_param->bEnableWavefront) > { > - curEncData.m_rowStat[row].rowSatd += > curEncData.m_cuStat[cuAddr].vbvCost; > - curEncData.m_rowStat[row].rowIntraSatd += > curEncData.m_cuStat[cuAddr].intraVbvCost; > - curEncData.m_rowStat[row].encodedBits += > curEncData.m_cuStat[cuAddr].totalBits; > - curEncData.m_rowStat[row].sumQpRc += > curEncData.m_cuStat[cuAddr].baseQp; > + curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost; > + curEncData.m_rowStat[row].rowIntraSatd += > cuStat.intraVbvCost; > + curEncData.m_rowStat[row].encodedBits += > cuStat.totalBits; > + curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp; > curEncData.m_rowStat[row].numEncodedCUs = cuAddr; > } > > @@ -1543,7 +1560,7 @@ > if (!m_param->bEnableWavefront && col == numCols - 1) > { > double qpBase = curEncData.m_cuStat[cuAddr].baseQp; > - int reEncode = > m_top->m_rateControl->rowVbvRateControl(m_frame, > row, &m_rce, qpBase); > + int reEncode = > m_top->m_rateControl->rowVbvRateControl(m_frame, > row, &m_rce, qpBase, m_sliceBaseRow, sliceId); > qpBase = x265_clip3((double)m_param->rc.qpMin, > (double)m_param->rc.qpMax, qpBase); > curEncData.m_rowStat[row].rowQp = qpBase; > curEncData.m_rowStat[row].rowQpScale = > x265_qp2qScale(qpBase); > @@ -1569,15 +1586,16 @@ > } > } > // If current block is at row diagonal checkpoint, call vbv > ratecontrol. > - else if (m_param->bEnableWavefront && row == col && row) > + else if (m_param->bEnableWavefront && rowInSlice == col && > !bFirstRowInSlice) > { > if (m_param->rc.bEnableConstVbv) > { > - int32_t startCuAddr = numCols * row; > - int32_t EndCuAddr = startCuAddr + col; > - for (int32_t r = row; r >= 0; r--) > + uint32_t startCuAddr = numCols * row; > + uint32_t EndCuAddr = startCuAddr + col; > + > + for (int32_t r = row; r >= > (int32_t)m_sliceBaseRow[sliceId]; > r--) > { > - for (int32_t c = startCuAddr; c <= EndCuAddr && c > <= (int32_t)numCols * (r + 1) - 1; c++) > + for (uint32_t c = startCuAddr; c <= EndCuAddr && > c <= numCols * (r + 1) - 1; c++) > { > curEncData.m_rowStat[r].rowSatd += > curEncData.m_cuStat[c].vbvCost; > curEncData.m_rowStat[r].rowIntraSatd += > curEncData.m_cuStat[c].intraVbvCost; > @@ -1590,10 +1608,10 @@ > } > } > double qpBase = curEncData.m_cuStat[cuAddr].baseQp; > - int reEncode = > m_top->m_rateControl->rowVbvRateControl(m_frame, > row, &m_rce, qpBase); > + int reEncode = > m_top->m_rateControl->rowVbvRateControl(m_frame, > row, &m_rce, qpBase, m_sliceBaseRow, sliceId); > qpBase = x265_clip3((double)m_param->rc.qpMin, > (double)m_param->rc.qpMax, qpBase); > curEncData.m_rowStat[row].rowQp = qpBase; > - curEncData.m_rowStat[row].rowQpScale = > x265_qp2qScale(qpBase); > + curEncData.m_rowStat[row].rowQpScale = > x265_qp2qScale(qpBase); > > if (reEncode < 0) > { > @@ -1604,7 +1622,7 @@ > m_vbvResetTriggerRow = row; > m_bAllRowsStop = true; > > - for (uint32_t r = m_numRows - 1; r >= row; r--) > + for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r > >= row; r--) > { > CTURow& stopRow = m_rows[r]; > > @@ -1686,11 +1704,11 @@ > /* this row of CTUs has been compressed */ > if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv) > { > - if (row == m_numRows - 1) > + if (bLastRowInSlice) > { > - for (int32_t r = 0; r < (int32_t)m_numRows; r++) > + for (uint32_t r = m_sliceBaseRow[sliceId]; r < > m_sliceBaseRow[sliceId + 1]; r++) > { > - for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs + > 1; c < (int32_t)numCols * (r + 1); c++) > + for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs > + 1; c < numCols * (r + 1); c++) > { > curEncData.m_rowStat[r].rowSatd += > curEncData.m_cuStat[c].vbvCost; > curEncData.m_rowStat[r].rowIntraSatd += > curEncData.m_cuStat[c].intraVbvCost; > @@ -1708,26 +1726,41 @@ > * after half the frame is encoded, but after this initial period we > update > * after refLagRows (the number of rows reference frames must have > completed > * before referencees may begin encoding) */ > - uint32_t rowCount = 0; > if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv) > { > + uint32_t rowCount = 0; > + uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - > m_sliceBaseRow[sliceId]; > if (!m_rce.encodeOrder) > - rowCount = m_numRows - 1; > + rowCount = maxRows - 1; > else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / > m_param->fpsDenom)) > - rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1); > + rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1); > else > - rowCount = X265_MIN(m_refLagRows, m_numRows - 1); > - if (row == rowCount) > + rowCount = X265_MIN(m_refLagRows, maxRows - 1); > + > + if (rowInSlice == rowCount / m_param->maxSlices) > { > - m_rce.rowTotalBits = 0; > + m_rowSliceTotalBits[sliceId] = 0; > if (bIsVbv) > - for (uint32_t i = 0; i < rowCount; i++) > - m_rce.rowTotalBits += curEncData.m_rowStat[i]. > encodedBits; > + { > + for (uint32_t i = m_sliceBaseRow[sliceId]; i < (rowCount > / m_param->maxSlices) + m_sliceBaseRow[sliceId]; i++) > + m_rowSliceTotalBits[sliceId] += > curEncData.m_rowStat[i].encodedBits; > + } > else > - for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; > cuAddr++) > - m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr]. > totalBits; > + { > + uint32_t startAddr = rowCount * numCols * sliceId; > + uint32_t finishAddr = startAddr + rowCount * numCols; > + > + for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; > cuAddr++) > + m_rowSliceTotalBits[sliceId] += > curEncData.m_cuStat[cuAddr].totalBits; > + } > > - m_top->m_rateControl->rateControlUpdateStats(&m_rce); > + if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices) > + { > + m_rce.rowTotalBits = 0; > + for (uint32_t i = 0; i < m_param->maxSlices; i++) > + m_rce.rowTotalBits += m_rowSliceTotalBits[i]; > + m_top->m_rateControl->rateControlUpdateStats(&m_rce); > + } > } > } > > diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.h > --- a/source/encoder/frameencoder.h Tue Sep 12 18:13:03 2017 +0530 > +++ b/source/encoder/frameencoder.h Thu Sep 21 20:20:52 2017 +0530 > @@ -138,6 +138,7 @@ > volatile bool m_bAllRowsStop; > volatile int m_completionCount; > volatile int m_vbvResetTriggerRow; > + volatile int m_sliceCnt; > > uint32_t m_numRows; > uint32_t m_numCols; > @@ -147,8 +148,10 @@ > > CTURow* m_rows; > uint16_t m_sliceAddrBits; > - uint16_t m_sliceGroupSize; > - uint32_t* m_sliceBaseRow; > + uint32_t m_sliceGroupSize; > + uint32_t* m_sliceBaseRow; > + uint32_t* m_sliceMaxBlockRow; > + int64_t m_rowSliceTotalBits[2]; > RateControlEntry m_rce; > SEIDecodedPictureHash m_seiReconPictureDigest; > > diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.cpp > --- a/source/encoder/ratecontrol.cpp Tue Sep 12 18:13:03 2017 +0530 > +++ b/source/encoder/ratecontrol.cpp Thu Sep 21 20:20:52 2017 +0530 > @@ -732,7 +732,6 @@ > m_bitrate = m_param->rc.bitrate * 1000; > } > > - > void RateControl::initHRD(SPS& sps) > { > int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; > @@ -765,6 +764,7 @@ > > #undef MAX_DURATION > } > + > bool RateControl::analyseABR2Pass(uint64_t allAvailableBits) > { > double rateFactor, stepMult; > @@ -1473,6 +1473,7 @@ > > return q; > } > + > double RateControl::countExpectedBits(int startPos, int endPos) > { > double expectedBits = 0; > @@ -1484,6 +1485,7 @@ > } > return expectedBits; > } > + > bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int > over, int endPos) > { > /* find an interval ending on an overflow or underflow (depending on > whether > @@ -1531,6 +1533,7 @@ > } > return adjusted; > } > + > bool RateControl::cuTreeReadFor2Pass(Frame* frame) > { > int index = m_encOrder[frame->m_poc]; > @@ -1579,24 +1582,24 @@ > double RateControl::tuneAbrQScaleFromFeedback(double qScale) > { > double abrBuffer = 2 * m_rateTolerance * m_bitrate; > - /* use framesDone instead of POC as poc count is not serial with > bframes enabled */ > - double overflow = 1.0; > - double timeDone = (double)(m_framesDone - > m_param->frameNumThreads + 1) * m_frameDuration; > - double wantedBits = timeDone * m_bitrate; > - int64_t encodedBits = m_totalBits; > - if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) > - { > - abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); > - encodedBits = m_encodedBits; > - } > + /* use framesDone instead of POC as poc count is not serial with > bframes enabled */ > + double overflow = 1.0; > + double timeDone = (double)(m_framesDone - m_param->frameNumThreads + > 1) * m_frameDuration; > + double wantedBits = timeDone * m_bitrate; > + int64_t encodedBits = m_totalBits; > + if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) > + { > + abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); > + encodedBits = m_encodedBits; > + } > > - if (wantedBits > 0 && encodedBits > 0 && > (!m_partialResidualFrames || > - m_param->rc.bStrictCbr || m_isGrainEnabled)) > - { > - abrBuffer *= X265_MAX(1, sqrt(timeDone)); > - overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - > wantedBits) / abrBuffer); > - qScale *= overflow; > - } > + if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames || > + m_param->rc.bStrictCbr || m_isGrainEnabled)) > + { > + abrBuffer *= X265_MAX(1, sqrt(timeDone)); > + overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / > abrBuffer); > + qScale *= overflow; > + } > return qScale; > } > > @@ -2330,17 +2333,18 @@ > return totalSatdBits + encodedBitsSoFar; > } > > -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, > RateControlEntry* rce, double& qpVbv) > +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, > RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t > sliceId) > { > FrameData& curEncData = *curFrame->m_encData; > double qScaleVbv = x265_qp2qScale(qpVbv); > uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd; > double encodedBits = curEncData.m_rowStat[row].encodedBits; > + uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; > > - if (m_param->bEnableWavefront && row == 1) > + if (m_param->bEnableWavefront && rowInSlice == 1) > { > - rowSatdCost += curEncData.m_rowStat[0].rowSatd; > - encodedBits += curEncData.m_rowStat[0].encodedBits; > + rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd; > + encodedBits += curEncData.m_rowStat[row - 1].encodedBits; > } > rowSatdCost >>= X265_DEPTH - 8; > updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, > encodedBits); > @@ -2350,8 +2354,8 @@ > if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp) > { > uint64_t intraRowSatdCost = curEncData.m_rowStat[row]. > rowIntraSatd; > - if (m_param->bEnableWavefront && row == 1) > - intraRowSatdCost += curEncData.m_rowStat[0].rowIntraSatd; > + if (m_param->bEnableWavefront && rowInSlice == 1) > + intraRowSatdCost += curEncData.m_rowStat[row - > 1].rowIntraSatd; > intraRowSatdCost >>= X265_DEPTH - 8; > updatePredictor(rce->rowPred[1], qScaleVbv, > (double)intraRowSatdCost, encodedBits); > } > @@ -2376,7 +2380,7 @@ > const SPS& sps = *curEncData.m_slice->m_sps; > double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight); > > - if (row < sps.numCuInHeight - 1) > + if (row < m_sliceBaseRow[sliceId + 1] - 1) > { > /* More threads means we have to be more cautious in letting > ratecontrol use up extra bits. */ > double rcTol = bufferLeftPlanned / m_param->frameNumThreads * > m_rateTolerance; > @@ -2693,8 +2697,8 @@ > m_encodedBitsWindow[pos % s_slidingWindowFrames] = actualBits; > if(rce->sliceType != I_SLICE) > { > - int qp = int (rce->qpaRc + 0.5); > - m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits > : (m_qpToEncodedBits[qp] + actualBits) * 0.5; > + int qp = int (rce->qpaRc + 0.5); > + m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? > actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; > } > curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow; > curFrame->m_rcData->cplxrSum = m_cplxrSum; > @@ -2779,7 +2783,8 @@ > curFrame->m_encData->m_frameStats.percent8x8Skip * m_ncu) < > 0) > goto writeFailure; > } > - else{ > + else > + { > RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps; > int i, num = rpsWriter->numberOfPictures; > char deltaPOC[128]; > diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.h > --- a/source/encoder/ratecontrol.h Tue Sep 12 18:13:03 2017 +0530 > +++ b/source/encoder/ratecontrol.h Thu Sep 21 20:20:52 2017 +0530 > @@ -244,7 +244,7 @@ > int rateControlStart(Frame* curFrame, RateControlEntry* rce, > Encoder* enc); > void rateControlUpdateStats(RateControlEntry* rce); > int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* > rce, int *filler); > - int rowVbvRateControl(Frame* curFrame, uint32_t row, > RateControlEntry* rce, double& qpVbv); > + int rowVbvRateControl(Frame* curFrame, uint32_t row, > RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t > sliceId); > int rateControlSliceType(int frameNum); > bool cuTreeReadFor2Pass(Frame* curFrame); > void hrdFullness(SEIBufferingPeriod* sei); >
# HG changeset patch # User Ashok Kumar Mishra <[email protected]> # Date 1506091858 -19800 # Fri Sep 22 20:20:58 2017 +0530 # Node ID 0882827c33cccab9aa8622c443c5bbba86d8b482 # Parent e62b12bd8b4573b15290ebf110e01c8fafce55be vbv hanging issue; fix for multiple slices When multiple slices are enabled, vbv rate control must take care of correct rows in slices, since multiple slices are encoding simultaneously. diff -r e62b12bd8b45 -r 0882827c33cc source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Thu Jun 29 13:13:56 2017 +0530 +++ b/source/encoder/frameencoder.cpp Fri Sep 22 20:20:58 2017 +0530 @@ -88,6 +88,7 @@ delete[] m_outStreams; delete[] m_backupStreams; X265_FREE(m_sliceBaseRow); + X265_FREE(m_sliceMaxBlockRow); X265_FREE(m_cuGeoms); X265_FREE(m_ctuGeomMap); X265_FREE(m_substreamSizes); @@ -118,6 +119,40 @@ m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); ok &= !!m_sliceBaseRow; + m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices; + uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices; + uint32_t rowSum = sliceGroupSizeAccu; + uint32_t sidx = 0; + for (uint32_t i = 0; i < m_numRows; i++) + { + const uint32_t rowRange = (rowSum >> 8); + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) + { + rowSum += sliceGroupSizeAccu; + m_sliceBaseRow[++sidx] = i; + } + } + X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); + m_sliceBaseRow[0] = 0; + m_sliceBaseRow[m_param->maxSlices] = m_numRows; + + m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); + ok &= !!m_sliceMaxBlockRow; + uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16; + sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices; + rowSum = sliceGroupSizeAccu; + sidx = 0; + for (uint32_t i = 0; i < maxBlockRows; i++) + { + const uint32_t rowRange = (rowSum >> 8); + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) + { + rowSum += sliceGroupSizeAccu; + m_sliceMaxBlockRow[++sidx] = i; + } + } + m_sliceMaxBlockRow[0] = 0; + m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows; /* determine full motion search range */ int range = m_param->searchRange; /* fpel search */ @@ -341,6 +376,8 @@ m_completionCount = 0; m_bAllRowsStop = false; m_vbvResetTriggerRow = -1; + m_rowSliceTotalBits[0] = 0; + m_rowSliceTotalBits[1] = 0; m_SSDY = m_SSDU = m_SSDV = 0; m_ssim = 0; @@ -550,28 +587,13 @@ /* reset entropy coders and compute slice id */ m_entropyCoder.load(m_initSliceContext); - const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) / m_param->maxSlices; - const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices; - m_sliceGroupSize = (uint16_t)sliceGroupSize; + + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) + for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++) + m_rows[row].init(m_initSliceContext, sliceId); - uint32_t rowSum = sliceGroupSizeAccu; - uint32_t sidx = 0; - for (uint32_t i = 0; i < m_numRows; i++) - { - const uint32_t rowRange = (rowSum >> 8); - - if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) - { - rowSum += sliceGroupSizeAccu; - m_sliceBaseRow[++sidx] = i; - } - - m_rows[i].init(m_initSliceContext, sidx); - } - X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); - - m_sliceBaseRow[0] = 0; - m_sliceBaseRow[m_param->maxSlices] = m_numRows; + // reset slice counter for rate control update + m_sliceCnt = 0; uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices; X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!"); @@ -586,8 +608,10 @@ m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]); } else + { for (uint32_t i = 0; i < numSubstreams; i++) m_outStreams[i].resetBits(); + } int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0; @@ -697,10 +721,9 @@ * compressed in a wave-front pattern if WPP is enabled. Row based loop * filters runs behind the CTU compression and reconstruction */ - for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) - { + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) m_rows[m_sliceBaseRow[sliceId]].active = true; - } + if (m_param->bEnableWavefront) { int i = 0; @@ -719,6 +742,7 @@ } } } + if (m_param->bEnableWavefront) { for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++) @@ -751,6 +775,7 @@ m_mref[l][ref].applyWeight(rowIdx, m_numRows, sliceEndRow, sliceId); } } + enableRowEncoder(m_row_to_idx[row]); /* clear external dependency for this row */ if (!rowInSlice) { @@ -980,9 +1005,8 @@ // complete the slice header by writing WPP row-starts m_entropyCoder.setBitstream(&m_bs); if (slice->m_pps->bEntropyCodingSyncEnabled) - { m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize); - } + m_bs.writeByteAlignment(); m_nalList.serialize(slice->m_nalUnitType, m_bs); @@ -1211,17 +1235,21 @@ int64_t startTime = x265_mdate(); if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime) m_totalNoWorkerTime += x265_mdate() - m_stallStartTime; + const uint32_t realRow = m_idx_to_row[row >> 1]; const uint32_t typeNum = m_idx_to_row[row & 1]; + if (!typeNum) processRowEncoder(realRow, m_tld[threadId]); else { m_frameFilter.processRow(realRow); + // NOTE: Active next row if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1) enqueueRowFilter(m_row_to_idx[realRow + 1]); } + if (ATOMIC_DEC(&m_activeWorkerCount) == 0) m_stallStartTime = x265_mdate(); @@ -1264,20 +1292,18 @@ const uint32_t lineStartCUAddr = row * numCols; bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0; + const uint32_t sliceId = curRow.sliceId; uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16; - uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1)) / 16; uint32_t noOfBlocks = m_param->maxCUSize / 16; const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0; const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0; - const uint32_t sliceId = curRow.sliceId; const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1]; const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; - if (bFirstRowInSlice && !curRow.completed) - { - // Load SBAC coder context from previous row and initialize row state. - rowCoder.load(m_initSliceContext); - } + // Load SBAC coder context from previous row and initialize row state. + if (bFirstRowInSlice && !curRow.completed) + rowCoder.load(m_initSliceContext); + // calculate mean QP for consistent deltaQP signalling calculation if (m_param->bOptCUDeltaQP) { @@ -1287,15 +1313,12 @@ if (m_param->bEnableWavefront || !row) { double meanQPOff = 0; - uint32_t loopIncr, count = 0; bool isReferenced = IS_REFERENCED(m_frame); double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset; if (qpoffs) { - if (m_param->rc.qgSize == 8) - loopIncr = 8; - else - loopIncr = 16; + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16; + uint32_t cuYStart = 0, height = m_frame->m_fencPic->m_picHeight; if (m_param->bEnableWavefront) { @@ -1305,6 +1328,7 @@ uint32_t qgSize = m_param->rc.qgSize, width = m_frame->m_fencPic->m_picWidth; uint32_t maxOffsetCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr; + uint32_t count = 0; for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame->m_fencPic->m_picHeight); cuY += qgSize) { for (uint32_t cuX = 0; cuX < width; cuX += qgSize) @@ -1336,7 +1360,8 @@ } curRow.avgQPComputed = 1; } - } + } + // Initialize restrict on MV range in slices tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4; tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * (m_param->maxCUSize * 4) - 4 * 4); @@ -1364,16 +1389,16 @@ curRow.bufferedEntropy.copyState(rowCoder); curRow.bufferedEntropy.loadContexts(rowCoder); } - if (!row && m_vbvResetTriggerRow != intRow) + if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow) { curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc; curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc); } FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; - if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow) + if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow) cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp; - else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow) + else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow) cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp; else cuStat.baseQp = curEncData.m_rowStat[row].rowQp; @@ -1385,7 +1410,8 @@ { cuStat.vbvCost = 0; cuStat.intraVbvCost = 0; - for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++) + + for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++) { uint32_t idx = block_x + (block_y * maxBlockCols); @@ -1433,11 +1459,12 @@ { // NOTE: in VBV mode, we may reencode anytime, so we can't do Deblock stage-Horizon and SAO if (!bIsVbv) - { + { // Delay one row to avoid intra prediction conflict if (m_pool && !bFirstRowInSlice) - { + { int allowCol = col; + // avoid race condition on last column if (rowInSlice >= 2) { @@ -1446,11 +1473,13 @@ } m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol); } + // Last Row may start early if (m_pool && bLastRowInSlice) { // Deblocking last row int allowCol = col; + // avoid race condition on last column if (rowInSlice >= 2) { @@ -1472,6 +1501,7 @@ FrameStats frameLog; curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog); + // copy number of intra, inter cu per row into frame stats for 2 pass if (m_param->rc.bStatWrite) { @@ -1485,10 +1515,8 @@ int shift = 2 * (m_param->maxCUDepth - depth); int cuSize = m_param->maxCUSize >> depth; - if (cuSize == 8) - curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN); - else - curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] << shift); + curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) : + (int)(frameLog.cntIntra[depth] << shift); curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift); curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift); @@ -1518,12 +1546,13 @@ if (bIsVbv) { // Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled - if ((m_param->bEnableWavefront && (!cuAddr || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront) + FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; + if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront) { - curEncData.m_rowStat[row].rowSatd += curEncData.m_cuStat[cuAddr].vbvCost; - curEncData.m_rowStat[row].rowIntraSatd += curEncData.m_cuStat[cuAddr].intraVbvCost; - curEncData.m_rowStat[row].encodedBits += curEncData.m_cuStat[cuAddr].totalBits; - curEncData.m_rowStat[row].sumQpRc += curEncData.m_cuStat[cuAddr].baseQp; + curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost; + curEncData.m_rowStat[row].rowIntraSatd += cuStat.intraVbvCost; + curEncData.m_rowStat[row].encodedBits += cuStat.totalBits; + curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp; curEncData.m_rowStat[row].numEncodedCUs = cuAddr; } @@ -1531,7 +1560,7 @@ if (!m_param->bEnableWavefront && col == numCols - 1) { double qpBase = curEncData.m_cuStat[cuAddr].baseQp; - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase); + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId); qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase); curEncData.m_rowStat[row].rowQp = qpBase; curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase); @@ -1557,15 +1586,16 @@ } } // If current block is at row diagonal checkpoint, call vbv ratecontrol. - else if (m_param->bEnableWavefront && row == col && row) + else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice) { if (m_param->rc.bEnableConstVbv) { - int32_t startCuAddr = numCols * row; - int32_t EndCuAddr = startCuAddr + col; - for (int32_t r = row; r >= 0; r--) + uint32_t startCuAddr = numCols * row; + uint32_t EndCuAddr = startCuAddr + col; + + for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceId]; r--) { - for (int32_t c = startCuAddr; c <= EndCuAddr && c <= (int32_t)numCols * (r + 1) - 1; c++) + for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++) { curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost; curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost; @@ -1578,10 +1608,10 @@ } } double qpBase = curEncData.m_cuStat[cuAddr].baseQp; - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase); + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId); qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase); curEncData.m_rowStat[row].rowQp = qpBase; - curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase); + curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase); if (reEncode < 0) { @@ -1592,7 +1622,7 @@ m_vbvResetTriggerRow = row; m_bAllRowsStop = true; - for (uint32_t r = m_numRows - 1; r >= row; r--) + for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--) { CTURow& stopRow = m_rows[r]; @@ -1670,14 +1700,15 @@ return; } } + /* this row of CTUs has been compressed */ if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv) { - if (row == m_numRows - 1) + if (bLastRowInSlice) { - for (int32_t r = 0; r < (int32_t)m_numRows; r++) + for (uint32_t r = m_sliceBaseRow[sliceId]; r < m_sliceBaseRow[sliceId + 1]; r++) { - for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs + 1; c < (int32_t)numCols * (r + 1); c++) + for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs + 1; c < numCols * (r + 1); c++) { curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost; curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost; @@ -1695,26 +1726,41 @@ * after half the frame is encoded, but after this initial period we update * after refLagRows (the number of rows reference frames must have completed * before referencees may begin encoding) */ - uint32_t rowCount = 0; if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv) { + uint32_t rowCount = 0; + uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - m_sliceBaseRow[sliceId]; if (!m_rce.encodeOrder) - rowCount = m_numRows - 1; + rowCount = maxRows - 1; else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom)) - rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1); + rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1); else - rowCount = X265_MIN(m_refLagRows, m_numRows - 1); - if (row == rowCount) + rowCount = X265_MIN(m_refLagRows, maxRows - 1); + + if (rowInSlice == rowCount / m_param->maxSlices) { - m_rce.rowTotalBits = 0; + m_rowSliceTotalBits[sliceId] = 0; if (bIsVbv) - for (uint32_t i = 0; i < rowCount; i++) - m_rce.rowTotalBits += curEncData.m_rowStat[i].encodedBits; + { + for (uint32_t i = m_sliceBaseRow[sliceId]; i < (rowCount / m_param->maxSlices) + m_sliceBaseRow[sliceId]; i++) + m_rowSliceTotalBits[sliceId] += curEncData.m_rowStat[i].encodedBits; + } else - for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; cuAddr++) - m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].totalBits; + { + uint32_t startAddr = rowCount * numCols * sliceId; + uint32_t finishAddr = startAddr + rowCount * numCols; + + for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; cuAddr++) + m_rowSliceTotalBits[sliceId] += curEncData.m_cuStat[cuAddr].totalBits; + } - m_top->m_rateControl->rateControlUpdateStats(&m_rce); + if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices) + { + m_rce.rowTotalBits = 0; + for (uint32_t i = 0; i < m_param->maxSlices; i++) + m_rce.rowTotalBits += m_rowSliceTotalBits[i]; + m_top->m_rateControl->rateControlUpdateStats(&m_rce); + } } } @@ -1742,11 +1788,13 @@ if (rowInSlice >= m_filterRowDelay) { enableRowFilter(m_row_to_idx[row - m_filterRowDelay]); + /* NOTE: Activate filter if first row (row 0) */ if (rowInSlice == m_filterRowDelay) enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]); tryWakeOne(); } + if (bLastRowInSlice) { for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < endRowInSlicePlus1; i++) diff -r e62b12bd8b45 -r 0882827c33cc source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Thu Jun 29 13:13:56 2017 +0530 +++ b/source/encoder/frameencoder.h Fri Sep 22 20:20:58 2017 +0530 @@ -138,6 +138,7 @@ volatile bool m_bAllRowsStop; volatile int m_completionCount; volatile int m_vbvResetTriggerRow; + volatile int m_sliceCnt; uint32_t m_numRows; uint32_t m_numCols; @@ -147,8 +148,10 @@ CTURow* m_rows; uint16_t m_sliceAddrBits; - uint16_t m_sliceGroupSize; - uint32_t* m_sliceBaseRow; + uint32_t m_sliceGroupSize; + uint32_t* m_sliceBaseRow; + uint32_t* m_sliceMaxBlockRow; + int64_t m_rowSliceTotalBits[2]; RateControlEntry m_rce; SEIDecodedPictureHash m_seiReconPictureDigest; diff -r e62b12bd8b45 -r 0882827c33cc source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cpp Thu Jun 29 13:13:56 2017 +0530 +++ b/source/encoder/ratecontrol.cpp Fri Sep 22 20:20:58 2017 +0530 @@ -732,7 +732,6 @@ m_bitrate = m_param->rc.bitrate * 1000; } - void RateControl::initHRD(SPS& sps) { int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; @@ -765,6 +764,7 @@ #undef MAX_DURATION } + bool RateControl::analyseABR2Pass(uint64_t allAvailableBits) { double rateFactor, stepMult; @@ -1473,6 +1473,7 @@ return q; } + double RateControl::countExpectedBits(int startPos, int endPos) { double expectedBits = 0; @@ -1484,6 +1485,7 @@ } return expectedBits; } + bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int over, int endPos) { /* find an interval ending on an overflow or underflow (depending on whether @@ -1531,6 +1533,7 @@ } return adjusted; } + bool RateControl::cuTreeReadFor2Pass(Frame* frame) { int index = m_encOrder[frame->m_poc]; @@ -1579,24 +1582,24 @@ double RateControl::tuneAbrQScaleFromFeedback(double qScale) { double abrBuffer = 2 * m_rateTolerance * m_bitrate; - /* use framesDone instead of POC as poc count is not serial with bframes enabled */ - double overflow = 1.0; - double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration; - double wantedBits = timeDone * m_bitrate; - int64_t encodedBits = m_totalBits; - if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) - { - abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); - encodedBits = m_encodedBits; - } + /* use framesDone instead of POC as poc count is not serial with bframes enabled */ + double overflow = 1.0; + double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration; + double wantedBits = timeDone * m_bitrate; + int64_t encodedBits = m_totalBits; + if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) + { + abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); + encodedBits = m_encodedBits; + } - if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames || - m_param->rc.bStrictCbr || m_isGrainEnabled)) - { - abrBuffer *= X265_MAX(1, sqrt(timeDone)); - overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer); - qScale *= overflow; - } + if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames || + m_param->rc.bStrictCbr || m_isGrainEnabled)) + { + abrBuffer *= X265_MAX(1, sqrt(timeDone)); + overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer); + qScale *= overflow; + } return qScale; } @@ -2330,17 +2333,18 @@ return totalSatdBits + encodedBitsSoFar; } -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv) +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId) { FrameData& curEncData = *curFrame->m_encData; double qScaleVbv = x265_qp2qScale(qpVbv); uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd; double encodedBits = curEncData.m_rowStat[row].encodedBits; + uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; - if (m_param->bEnableWavefront && row == 1) + if (m_param->bEnableWavefront && rowInSlice == 1) { - rowSatdCost += curEncData.m_rowStat[0].rowSatd; - encodedBits += curEncData.m_rowStat[0].encodedBits; + rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd; + encodedBits += curEncData.m_rowStat[row - 1].encodedBits; } rowSatdCost >>= X265_DEPTH - 8; updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, encodedBits); @@ -2350,8 +2354,8 @@ if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp) { uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowIntraSatd; - if (m_param->bEnableWavefront && row == 1) - intraRowSatdCost += curEncData.m_rowStat[0].rowIntraSatd; + if (m_param->bEnableWavefront && rowInSlice == 1) + intraRowSatdCost += curEncData.m_rowStat[row - 1].rowIntraSatd; intraRowSatdCost >>= X265_DEPTH - 8; updatePredictor(rce->rowPred[1], qScaleVbv, (double)intraRowSatdCost, encodedBits); } @@ -2376,7 +2380,7 @@ const SPS& sps = *curEncData.m_slice->m_sps; double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight); - if (row < sps.numCuInHeight - 1) + if (row < m_sliceBaseRow[sliceId + 1] - 1) { /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */ double rcTol = bufferLeftPlanned / m_param->frameNumThreads * m_rateTolerance; @@ -2693,8 +2697,8 @@ m_encodedBitsWindow[pos % s_slidingWindowFrames] = actualBits; if(rce->sliceType != I_SLICE) { - int qp = int (rce->qpaRc + 0.5); - m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; + int qp = int (rce->qpaRc + 0.5); + m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; } curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow; curFrame->m_rcData->cplxrSum = m_cplxrSum; @@ -2779,7 +2783,8 @@ curFrame->m_encData->m_frameStats.percent8x8Skip * m_ncu) < 0) goto writeFailure; } - else{ + else + { RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps; int i, num = rpsWriter->numberOfPictures; char deltaPOC[128]; diff -r e62b12bd8b45 -r 0882827c33cc source/encoder/ratecontrol.h --- a/source/encoder/ratecontrol.h Thu Jun 29 13:13:56 2017 +0530 +++ b/source/encoder/ratecontrol.h Fri Sep 22 20:20:58 2017 +0530 @@ -244,7 +244,7 @@ int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc); void rateControlUpdateStats(RateControlEntry* rce); int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce, int *filler); - int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv); + int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId); int rateControlSliceType(int frameNum); bool cuTreeReadFor2Pass(Frame* curFrame); void hrdFullness(SEIBufferingPeriod* sei);
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
