Kindly ignore this patch. I am sending the updated patch after review. On Thu, Sep 28, 2017 at 1:27 PM, Ashok Kumar Mishra < [email protected]> wrote:
> Both the patches are same, you can apply on top of my previous two patches. > > On Thu, Sep 28, 2017 at 12:07 PM, Pradeep Ramachandran < > [email protected]> wrote: > >> On Tue, Sep 26, 2017 at 6:49 PM, Ashok Kumar Mishra < >> [email protected]> wrote: >> >>> Please find the attached patch. >>> >> >> This patch is confusing - is this to be applied on top of the previous >> patch (which didn't work), or is this a replacement patch (which didn't >> work either as I can't find the parent). >> >> >>> >>> On Thu, Sep 21, 2017 at 8:21 PM, <[email protected]> wrote: >>> >>>> # HG changeset patch >>>> # User Ashok Kumar Mishra <[email protected]> >>>> # Date 1506005452 -19800 >>>> # Thu Sep 21 20:20:52 2017 +0530 >>>> # Node ID 546387e0b983ac1d68cda73777b34a122928cd32 >>>> # Parent 71f700844b0b2a9120bfd8a2d1f13e219aa20677 >>>> vbv hanging issue; fix for multiple slices >>>> When multiple slices are enabled, vbv rate control must take care of >>>> correct rows in slices, since multiple slices are encoding >>>> simultaneously. >>>> >>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.cpp >>>> --- a/source/encoder/frameencoder.cpp Tue Sep 12 18:13:03 2017 +0530 >>>> +++ b/source/encoder/frameencoder.cpp Thu Sep 21 20:20:52 2017 +0530 >>>> @@ -88,6 +88,7 @@ >>>> delete[] m_outStreams; >>>> delete[] m_backupStreams; >>>> X265_FREE(m_sliceBaseRow); >>>> + X265_FREE(m_sliceMaxBlockRow); >>>> X265_FREE(m_cuGeoms); >>>> X265_FREE(m_ctuGeomMap); >>>> X265_FREE(m_substreamSizes); >>>> @@ -118,6 +119,40 @@ >>>> >>>> m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); >>>> ok &= !!m_sliceBaseRow; >>>> + m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) >>>> / m_param->maxSlices; >>>> + uint32_t sliceGroupSizeAccu = (m_numRows << 8) / >>>> m_param->maxSlices; >>>> + uint32_t rowSum = sliceGroupSizeAccu; >>>> + uint32_t sidx = 0; >>>> + for (uint32_t i = 0; i < m_numRows; i++) >>>> + { >>>> + const uint32_t rowRange = (rowSum >> 8); >>>> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >>>> + { >>>> + rowSum += sliceGroupSizeAccu; >>>> + m_sliceBaseRow[++sidx] = i; >>>> + } >>>> + } >>>> + X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); >>>> + m_sliceBaseRow[0] = 0; >>>> + m_sliceBaseRow[m_param->maxSlices] = m_numRows; >>>> + >>>> + m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); >>>> + ok &= !!m_sliceMaxBlockRow; >>>> + uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16; >>>> + sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices; >>>> + rowSum = sliceGroupSizeAccu; >>>> + sidx = 0; >>>> + for (uint32_t i = 0; i < maxBlockRows; i++) >>>> + { >>>> + const uint32_t rowRange = (rowSum >> 8); >>>> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >>>> + { >>>> + rowSum += sliceGroupSizeAccu; >>>> + m_sliceMaxBlockRow[++sidx] = i; >>>> + } >>>> + } >>>> + m_sliceMaxBlockRow[0] = 0; >>>> + m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows; >>>> >>>> /* determine full motion search range */ >>>> int range = m_param->searchRange; /* fpel search */ >>>> @@ -341,6 +376,8 @@ >>>> m_completionCount = 0; >>>> m_bAllRowsStop = false; >>>> m_vbvResetTriggerRow = -1; >>>> + m_rowSliceTotalBits[0] = 0; >>>> + m_rowSliceTotalBits[1] = 0; >>>> >>>> m_SSDY = m_SSDU = m_SSDV = 0; >>>> m_ssim = 0; >>>> @@ -550,28 +587,13 @@ >>>> >>>> /* reset entropy coders and compute slice id */ >>>> m_entropyCoder.load(m_initSliceContext); >>>> - const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - >>>> 1) / m_param->maxSlices; >>>> - const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / >>>> m_param->maxSlices; >>>> - m_sliceGroupSize = (uint16_t)sliceGroupSize; >>>> + >>>> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >>>> + for (uint32_t row = m_sliceBaseRow[sliceId]; row < >>>> m_sliceBaseRow[sliceId + 1]; row++) >>>> + m_rows[row].init(m_initSliceContext, sliceId); >>>> >>>> - uint32_t rowSum = sliceGroupSizeAccu; >>>> - uint32_t sidx = 0; >>>> - for (uint32_t i = 0; i < m_numRows; i++) >>>> - { >>>> - const uint32_t rowRange = (rowSum >> 8); >>>> - >>>> - if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >>>> - { >>>> - rowSum += sliceGroupSizeAccu; >>>> - m_sliceBaseRow[++sidx] = i; >>>> - } >>>> - >>>> - m_rows[i].init(m_initSliceContext, sidx); >>>> - } >>>> - X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); >>>> - >>>> - m_sliceBaseRow[0] = 0; >>>> - m_sliceBaseRow[m_param->maxSlices] = m_numRows; >>>> + // reset slice counter for rate control update >>>> + m_sliceCnt = 0; >>>> >>>> uint32_t numSubstreams = m_param->bEnableWavefront ? >>>> slice->m_sps->numCuInHeight : m_param->maxSlices; >>>> X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == >>>> 1), "Multiple slices without WPP unsupport now!"); >>>> @@ -586,8 +608,10 @@ >>>> m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]); >>>> } >>>> else >>>> + { >>>> for (uint32_t i = 0; i < numSubstreams; i++) >>>> m_outStreams[i].resetBits(); >>>> + } >>>> >>>> int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0; >>>> >>>> @@ -697,10 +721,9 @@ >>>> * compressed in a wave-front pattern if WPP is enabled. Row based >>>> loop >>>> * filters runs behind the CTU compression and reconstruction */ >>>> >>>> - for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >>>> - { >>>> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >>>> m_rows[m_sliceBaseRow[sliceId]].active = true; >>>> - } >>>> + >>>> if (m_param->bEnableWavefront) >>>> { >>>> int i = 0; >>>> @@ -982,9 +1005,8 @@ >>>> // complete the slice header by writing WPP row-starts >>>> m_entropyCoder.setBitstream(&m_bs); >>>> if (slice->m_pps->bEntropyCodingSyncEnabled) >>>> - { >>>> m_entropyCoder.codeSliceHeade >>>> rWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - >>>> prevSliceRow - 1), maxStreamSize); >>>> - } >>>> + >>>> m_bs.writeByteAlignment(); >>>> >>>> m_nalList.serialize(slice->m_nalUnitType, m_bs); >>>> @@ -1270,20 +1292,17 @@ >>>> const uint32_t lineStartCUAddr = row * numCols; >>>> bool bIsVbv = m_param->rc.vbvBufferSize > 0 && >>>> m_param->rc.vbvMaxBitrate > 0; >>>> >>>> + const uint32_t sliceId = curRow.sliceId; >>>> uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - >>>> 1)) / 16; >>>> - uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - >>>> 1)) / 16; >>>> uint32_t noOfBlocks = m_param->maxCUSize / 16; >>>> const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - >>>> 1].sliceId != curRow.sliceId)) ? 1 : 0; >>>> const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || >>>> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0; >>>> - const uint32_t sliceId = curRow.sliceId; >>>> const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1]; >>>> const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; >>>> >>>> - if (bFirstRowInSlice && !curRow.completed) >>>> - { >>>> - // Load SBAC coder context from previous row and initialize >>>> row state. >>>> - rowCoder.load(m_initSliceContext); >>>> - } >>>> + // Load SBAC coder context from previous row and initialize row >>>> state. >>>> + if (bFirstRowInSlice && !curRow.completed) >>>> + rowCoder.load(m_initSliceContext); >>>> >>>> // calculate mean QP for consistent deltaQP signalling calculation >>>> if (m_param->bOptCUDeltaQP) >>>> @@ -1294,15 +1313,12 @@ >>>> if (m_param->bEnableWavefront || !row) >>>> { >>>> double meanQPOff = 0; >>>> - uint32_t loopIncr, count = 0; >>>> bool isReferenced = IS_REFERENCED(m_frame); >>>> double *qpoffs = (isReferenced && m_param->rc.cuTree) >>>> ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset; >>>> if (qpoffs) >>>> { >>>> - if (m_param->rc.qgSize == 8) >>>> - loopIncr = 8; >>>> - else >>>> - loopIncr = 16; >>>> + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 >>>> : 16; >>>> + >>>> uint32_t cuYStart = 0, height = >>>> m_frame->m_fencPic->m_picHeight; >>>> if (m_param->bEnableWavefront) >>>> { >>>> @@ -1312,6 +1328,7 @@ >>>> >>>> uint32_t qgSize = m_param->rc.qgSize, width = >>>> m_frame->m_fencPic->m_picWidth; >>>> uint32_t maxOffsetCols = >>>> (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr; >>>> + uint32_t count = 0; >>>> for (uint32_t cuY = cuYStart; cuY < height && (cuY >>>> < m_frame->m_fencPic->m_picHeight); cuY += qgSize) >>>> { >>>> for (uint32_t cuX = 0; cuX < width; cuX += >>>> qgSize) >>>> @@ -1372,16 +1389,16 @@ >>>> curRow.bufferedEntropy.copyState(rowCoder); >>>> curRow.bufferedEntropy.loadContexts(rowCoder); >>>> } >>>> - if (!row && m_vbvResetTriggerRow != intRow) >>>> + if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow) >>>> { >>>> curEncData.m_rowStat[row].rowQp = >>>> curEncData.m_avgQpRc; >>>> curEncData.m_rowStat[row].rowQpScale = >>>> x265_qp2qScale(curEncData.m_avgQpRc); >>>> } >>>> >>>> FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; >>>> - if (m_param->bEnableWavefront && row >= col && row && >>>> m_vbvResetTriggerRow != intRow) >>>> + if (m_param->bEnableWavefront && rowInSlice >= col && >>>> !bFirstRowInSlice && m_vbvResetTriggerRow != intRow) >>>> cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + >>>> 1].baseQp; >>>> - else if (!m_param->bEnableWavefront && row && >>>> m_vbvResetTriggerRow != intRow) >>>> + else if (!m_param->bEnableWavefront && !bFirstRowInSlice >>>> && m_vbvResetTriggerRow != intRow) >>>> cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp; >>>> else >>>> cuStat.baseQp = curEncData.m_rowStat[row].rowQp; >>>> @@ -1393,7 +1410,8 @@ >>>> { >>>> cuStat.vbvCost = 0; >>>> cuStat.intraVbvCost = 0; >>>> - for (uint32_t h = 0; h < noOfBlocks && block_y < >>>> maxBlockRows; h++, block_y++) >>>> + >>>> + for (uint32_t h = 0; h < noOfBlocks && block_y < >>>> m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++) >>>> { >>>> uint32_t idx = block_x + (block_y * maxBlockCols); >>>> >>>> @@ -1497,10 +1515,8 @@ >>>> int shift = 2 * (m_param->maxCUDepth - depth); >>>> int cuSize = m_param->maxCUSize >> depth; >>>> >>>> - if (cuSize == 8) >>>> - curRow.rowStats.intra8x8Cnt += >>>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN); >>>> - else >>>> - curRow.rowStats.intra8x8Cnt += >>>> (int)(frameLog.cntIntra[depth] << shift); >>>> + curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? >>>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) : >>>> + >>>> (int)(frameLog.cntIntra[depth] << shift); >>>> >>>> curRow.rowStats.inter8x8Cnt += >>>> (int)(frameLog.cntInter[depth] << shift); >>>> curRow.rowStats.skip8x8Cnt += >>>> (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << >>>> shift); >>>> @@ -1530,12 +1546,13 @@ >>>> if (bIsVbv) >>>> { >>>> // Update encoded bits, satdCost, baseQP for each CU if >>>> tune grain is disabled >>>> - if ((m_param->bEnableWavefront && (!cuAddr || >>>> !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront) >>>> + FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; >>>> + if ((m_param->bEnableWavefront && ((cuAddr == >>>> m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || >>>> !m_param->bEnableWavefront) >>>> { >>>> - curEncData.m_rowStat[row].rowSatd += >>>> curEncData.m_cuStat[cuAddr].vbvCost; >>>> - curEncData.m_rowStat[row].rowIntraSatd += >>>> curEncData.m_cuStat[cuAddr].intraVbvCost; >>>> - curEncData.m_rowStat[row].encodedBits += >>>> curEncData.m_cuStat[cuAddr].totalBits; >>>> - curEncData.m_rowStat[row].sumQpRc += >>>> curEncData.m_cuStat[cuAddr].baseQp; >>>> + curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost; >>>> + curEncData.m_rowStat[row].rowIntraSatd += >>>> cuStat.intraVbvCost; >>>> + curEncData.m_rowStat[row].encodedBits += >>>> cuStat.totalBits; >>>> + curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp; >>>> curEncData.m_rowStat[row].numEncodedCUs = cuAddr; >>>> } >>>> >>>> @@ -1543,7 +1560,7 @@ >>>> if (!m_param->bEnableWavefront && col == numCols - 1) >>>> { >>>> double qpBase = curEncData.m_cuStat[cuAddr].baseQp; >>>> - int reEncode = >>>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>>> row, &m_rce, qpBase); >>>> + int reEncode = >>>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId); >>>> qpBase = x265_clip3((double)m_param->rc.qpMin, >>>> (double)m_param->rc.qpMax, qpBase); >>>> curEncData.m_rowStat[row].rowQp = qpBase; >>>> curEncData.m_rowStat[row].rowQpScale = >>>> x265_qp2qScale(qpBase); >>>> @@ -1569,15 +1586,16 @@ >>>> } >>>> } >>>> // If current block is at row diagonal checkpoint, call >>>> vbv ratecontrol. >>>> - else if (m_param->bEnableWavefront && row == col && row) >>>> + else if (m_param->bEnableWavefront && rowInSlice == col && >>>> !bFirstRowInSlice) >>>> { >>>> if (m_param->rc.bEnableConstVbv) >>>> { >>>> - int32_t startCuAddr = numCols * row; >>>> - int32_t EndCuAddr = startCuAddr + col; >>>> - for (int32_t r = row; r >= 0; r--) >>>> + uint32_t startCuAddr = numCols * row; >>>> + uint32_t EndCuAddr = startCuAddr + col; >>>> + >>>> + for (int32_t r = row; r >= >>>> (int32_t)m_sliceBaseRow[sliceId]; r--) >>>> { >>>> - for (int32_t c = startCuAddr; c <= EndCuAddr >>>> && c <= (int32_t)numCols * (r + 1) - 1; c++) >>>> + for (uint32_t c = startCuAddr; c <= EndCuAddr >>>> && c <= numCols * (r + 1) - 1; c++) >>>> { >>>> curEncData.m_rowStat[r].rowSatd += >>>> curEncData.m_cuStat[c].vbvCost; >>>> curEncData.m_rowStat[r].rowIntraSatd += >>>> curEncData.m_cuStat[c].intraVbvCost; >>>> @@ -1590,10 +1608,10 @@ >>>> } >>>> } >>>> double qpBase = curEncData.m_cuStat[cuAddr].baseQp; >>>> - int reEncode = >>>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>>> row, &m_rce, qpBase); >>>> + int reEncode = >>>> m_top->m_rateControl->rowVbvRateControl(m_frame, >>>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId); >>>> qpBase = x265_clip3((double)m_param->rc.qpMin, >>>> (double)m_param->rc.qpMax, qpBase); >>>> curEncData.m_rowStat[row].rowQp = qpBase; >>>> - curEncData.m_rowStat[row].rowQpScale = >>>> x265_qp2qScale(qpBase); >>>> + curEncData.m_rowStat[row].rowQpScale = >>>> x265_qp2qScale(qpBase); >>>> >>>> if (reEncode < 0) >>>> { >>>> @@ -1604,7 +1622,7 @@ >>>> m_vbvResetTriggerRow = row; >>>> m_bAllRowsStop = true; >>>> >>>> - for (uint32_t r = m_numRows - 1; r >= row; r--) >>>> + for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; >>>> r >= row; r--) >>>> { >>>> CTURow& stopRow = m_rows[r]; >>>> >>>> @@ -1686,11 +1704,11 @@ >>>> /* this row of CTUs has been compressed */ >>>> if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv) >>>> { >>>> - if (row == m_numRows - 1) >>>> + if (bLastRowInSlice) >>>> { >>>> - for (int32_t r = 0; r < (int32_t)m_numRows; r++) >>>> + for (uint32_t r = m_sliceBaseRow[sliceId]; r < >>>> m_sliceBaseRow[sliceId + 1]; r++) >>>> { >>>> - for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs >>>> + 1; c < (int32_t)numCols * (r + 1); c++) >>>> + for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs >>>> + 1; c < numCols * (r + 1); c++) >>>> { >>>> curEncData.m_rowStat[r].rowSatd += >>>> curEncData.m_cuStat[c].vbvCost; >>>> curEncData.m_rowStat[r].rowIntraSatd += >>>> curEncData.m_cuStat[c].intraVbvCost; >>>> @@ -1708,26 +1726,41 @@ >>>> * after half the frame is encoded, but after this initial period >>>> we update >>>> * after refLagRows (the number of rows reference frames must have >>>> completed >>>> * before referencees may begin encoding) */ >>>> - uint32_t rowCount = 0; >>>> if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv) >>>> { >>>> + uint32_t rowCount = 0; >>>> + uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - >>>> m_sliceBaseRow[sliceId]; >>>> if (!m_rce.encodeOrder) >>>> - rowCount = m_numRows - 1; >>>> + rowCount = maxRows - 1; >>>> else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / >>>> m_param->fpsDenom)) >>>> - rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1); >>>> + rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1); >>>> else >>>> - rowCount = X265_MIN(m_refLagRows, m_numRows - 1); >>>> - if (row == rowCount) >>>> + rowCount = X265_MIN(m_refLagRows, maxRows - 1); >>>> + >>>> + if (rowInSlice == rowCount / m_param->maxSlices) >>>> { >>>> - m_rce.rowTotalBits = 0; >>>> + m_rowSliceTotalBits[sliceId] = 0; >>>> if (bIsVbv) >>>> - for (uint32_t i = 0; i < rowCount; i++) >>>> - m_rce.rowTotalBits += >>>> curEncData.m_rowStat[i].encodedBits; >>>> + { >>>> + for (uint32_t i = m_sliceBaseRow[sliceId]; i < >>>> (rowCount / m_param->maxSlices) + m_sliceBaseRow[sliceId]; i++) >>>> + m_rowSliceTotalBits[sliceId] += >>>> curEncData.m_rowStat[i].encodedBits; >>>> + } >>>> else >>>> - for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; >>>> cuAddr++) >>>> - m_rce.rowTotalBits += >>>> curEncData.m_cuStat[cuAddr].totalBits; >>>> + { >>>> + uint32_t startAddr = rowCount * numCols * sliceId; >>>> + uint32_t finishAddr = startAddr + rowCount * numCols; >>>> + >>>> + for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; >>>> cuAddr++) >>>> + m_rowSliceTotalBits[sliceId] += >>>> curEncData.m_cuStat[cuAddr].totalBits; >>>> + } >>>> >>>> - m_top->m_rateControl->rateControlUpdateStats(&m_rce); >>>> + if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices) >>>> + { >>>> + m_rce.rowTotalBits = 0; >>>> + for (uint32_t i = 0; i < m_param->maxSlices; i++) >>>> + m_rce.rowTotalBits += m_rowSliceTotalBits[i]; >>>> + m_top->m_rateControl->rateControlUpdateStats(&m_rce); >>>> + } >>>> } >>>> } >>>> >>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.h >>>> --- a/source/encoder/frameencoder.h Tue Sep 12 18:13:03 2017 +0530 >>>> +++ b/source/encoder/frameencoder.h Thu Sep 21 20:20:52 2017 +0530 >>>> @@ -138,6 +138,7 @@ >>>> volatile bool m_bAllRowsStop; >>>> volatile int m_completionCount; >>>> volatile int m_vbvResetTriggerRow; >>>> + volatile int m_sliceCnt; >>>> >>>> uint32_t m_numRows; >>>> uint32_t m_numCols; >>>> @@ -147,8 +148,10 @@ >>>> >>>> CTURow* m_rows; >>>> uint16_t m_sliceAddrBits; >>>> - uint16_t m_sliceGroupSize; >>>> - uint32_t* m_sliceBaseRow; >>>> + uint32_t m_sliceGroupSize; >>>> + uint32_t* m_sliceBaseRow; >>>> + uint32_t* m_sliceMaxBlockRow; >>>> + int64_t m_rowSliceTotalBits[2]; >>>> RateControlEntry m_rce; >>>> SEIDecodedPictureHash m_seiReconPictureDigest; >>>> >>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.cpp >>>> --- a/source/encoder/ratecontrol.cpp Tue Sep 12 18:13:03 2017 +0530 >>>> +++ b/source/encoder/ratecontrol.cpp Thu Sep 21 20:20:52 2017 +0530 >>>> @@ -732,7 +732,6 @@ >>>> m_bitrate = m_param->rc.bitrate * 1000; >>>> } >>>> >>>> - >>>> void RateControl::initHRD(SPS& sps) >>>> { >>>> int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; >>>> @@ -765,6 +764,7 @@ >>>> >>>> #undef MAX_DURATION >>>> } >>>> + >>>> bool RateControl::analyseABR2Pass(uint64_t allAvailableBits) >>>> { >>>> double rateFactor, stepMult; >>>> @@ -1473,6 +1473,7 @@ >>>> >>>> return q; >>>> } >>>> + >>>> double RateControl::countExpectedBits(int startPos, int endPos) >>>> { >>>> double expectedBits = 0; >>>> @@ -1484,6 +1485,7 @@ >>>> } >>>> return expectedBits; >>>> } >>>> + >>>> bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int >>>> over, int endPos) >>>> { >>>> /* find an interval ending on an overflow or underflow (depending >>>> on whether >>>> @@ -1531,6 +1533,7 @@ >>>> } >>>> return adjusted; >>>> } >>>> + >>>> bool RateControl::cuTreeReadFor2Pass(Frame* frame) >>>> { >>>> int index = m_encOrder[frame->m_poc]; >>>> @@ -1579,24 +1582,24 @@ >>>> double RateControl::tuneAbrQScaleFromFeedback(double qScale) >>>> { >>>> double abrBuffer = 2 * m_rateTolerance * m_bitrate; >>>> - /* use framesDone instead of POC as poc count is not serial >>>> with bframes enabled */ >>>> - double overflow = 1.0; >>>> - double timeDone = (double)(m_framesDone - >>>> m_param->frameNumThreads + 1) * m_frameDuration; >>>> - double wantedBits = timeDone * m_bitrate; >>>> - int64_t encodedBits = m_totalBits; >>>> - if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) >>>> - { >>>> - abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); >>>> - encodedBits = m_encodedBits; >>>> - } >>>> + /* use framesDone instead of POC as poc count is not serial with >>>> bframes enabled */ >>>> + double overflow = 1.0; >>>> + double timeDone = (double)(m_framesDone - m_param->frameNumThreads >>>> + 1) * m_frameDuration; >>>> + double wantedBits = timeDone * m_bitrate; >>>> + int64_t encodedBits = m_totalBits; >>>> + if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) >>>> + { >>>> + abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); >>>> + encodedBits = m_encodedBits; >>>> + } >>>> >>>> - if (wantedBits > 0 && encodedBits > 0 && >>>> (!m_partialResidualFrames || >>>> - m_param->rc.bStrictCbr || m_isGrainEnabled)) >>>> - { >>>> - abrBuffer *= X265_MAX(1, sqrt(timeDone)); >>>> - overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - >>>> wantedBits) / abrBuffer); >>>> - qScale *= overflow; >>>> - } >>>> + if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames >>>> || >>>> + m_param->rc.bStrictCbr || m_isGrainEnabled)) >>>> + { >>>> + abrBuffer *= X265_MAX(1, sqrt(timeDone)); >>>> + overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - >>>> wantedBits) / abrBuffer); >>>> + qScale *= overflow; >>>> + } >>>> return qScale; >>>> } >>>> >>>> @@ -2330,17 +2333,18 @@ >>>> return totalSatdBits + encodedBitsSoFar; >>>> } >>>> >>>> -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, >>>> RateControlEntry* rce, double& qpVbv) >>>> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, >>>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t >>>> sliceId) >>>> { >>>> FrameData& curEncData = *curFrame->m_encData; >>>> double qScaleVbv = x265_qp2qScale(qpVbv); >>>> uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd; >>>> double encodedBits = curEncData.m_rowStat[row].encodedBits; >>>> + uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; >>>> >>>> - if (m_param->bEnableWavefront && row == 1) >>>> + if (m_param->bEnableWavefront && rowInSlice == 1) >>>> { >>>> - rowSatdCost += curEncData.m_rowStat[0].rowSatd; >>>> - encodedBits += curEncData.m_rowStat[0].encodedBits; >>>> + rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd; >>>> + encodedBits += curEncData.m_rowStat[row - 1].encodedBits; >>> >>> } >>>> rowSatdCost >>= X265_DEPTH - 8; >>>> updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, >>>> encodedBits); >>>> @@ -2350,8 +2354,8 @@ >>>> if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp) >>>> { >>>> uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowI >>>> ntraSatd; >>>> - if (m_param->bEnableWavefront && row == 1) >>>> - intraRowSatdCost += curEncData.m_rowStat[0].rowInt >>>> raSatd; >>>> + if (m_param->bEnableWavefront && rowInSlice == 1) >>>> + intraRowSatdCost += curEncData.m_rowStat[row - >>>> 1].rowIntraSatd; >>>> intraRowSatdCost >>= X265_DEPTH - 8; >>>> updatePredictor(rce->rowPred[1], qScaleVbv, >>>> (double)intraRowSatdCost, encodedBits); >>>> } >>>> @@ -2376,7 +2380,7 @@ >>>> const SPS& sps = *curEncData.m_slice->m_sps; >>>> double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight); >>>> >>>> - if (row < sps.numCuInHeight - 1) >>>> + if (row < m_sliceBaseRow[sliceId + 1] - 1) >>>> { >>>> /* More threads means we have to be more cautious in letting >>>> ratecontrol use up extra bits. */ >>>> double rcTol = bufferLeftPlanned / m_param->frameNumThreads * >>>> m_rateTolerance; >>>> @@ -2693,8 +2697,8 @@ >>>> m_encodedBitsWindow[pos % s_slidingWindowFrames] = >>>> actualBits; >>>> if(rce->sliceType != I_SLICE) >>>> { >>>> - int qp = int (rce->qpaRc + 0.5); >>>> - m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? >>>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; >>>> + int qp = int (rce->qpaRc + 0.5); >>>> + m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? >>>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; >>>> } >>>> curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow; >>>> curFrame->m_rcData->cplxrSum = m_cplxrSum; >>>> @@ -2779,7 +2783,8 @@ >>>> curFrame->m_encData->m_frameStats.percent8x8Skip * >>>> m_ncu) < 0) >>>> goto writeFailure; >>>> } >>>> - else{ >>>> + else >>>> + { >>>> RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps; >>>> int i, num = rpsWriter->numberOfPictures; >>>> char deltaPOC[128]; >>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.h >>>> --- a/source/encoder/ratecontrol.h Tue Sep 12 18:13:03 2017 +0530 >>>> +++ b/source/encoder/ratecontrol.h Thu Sep 21 20:20:52 2017 +0530 >>>> @@ -244,7 +244,7 @@ >>>> int rateControlStart(Frame* curFrame, RateControlEntry* rce, >>>> Encoder* enc); >>>> void rateControlUpdateStats(RateControlEntry* rce); >>>> int rateControlEnd(Frame* curFrame, int64_t bits, >>>> RateControlEntry* rce, int *filler); >>>> - int rowVbvRateControl(Frame* curFrame, uint32_t row, >>>> RateControlEntry* rce, double& qpVbv); >>>> + int rowVbvRateControl(Frame* curFrame, uint32_t row, >>>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t >>>> sliceId); >>>> int rateControlSliceType(int frameNum); >>>> bool cuTreeReadFor2Pass(Frame* curFrame); >>>> void hrdFullness(SEIBufferingPeriod* sei); >>>> >>> >>> >>> _______________________________________________ >>> x265-devel mailing list >>> [email protected] >>> https://mailman.videolan.org/listinfo/x265-devel >>> >>> >> >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel >> >> >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
