Yes, sending once again. On Thu, Sep 28, 2017 at 2:59 PM, Pradeep Ramachandran < [email protected]> wrote:
> > On Thu, Sep 28, 2017 at 2:16 PM, <[email protected]> wrote: > >> # HG changeset patch >> # User Ashok Kumar Mishra <[email protected]> >> # Date 1506091858 -19800 >> # Fri Sep 22 20:20:58 2017 +0530 >> # Node ID c838e60c7c6ba0ab07e2d4130a5c2ba22e0b1eea >> # Parent e62b12bd8b4573b15290ebf110e01c8fafce55be >> vbv hanging issue; fix for multiple slices >> When multiple slices are enabled, vbv rate control must take care of >> correct rows in slices, since multiple slices are encoding simultaneously. >> >> > This patch doesn't apply on the current tip of the default branch. Please > fix and resend. > > >> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.cpp >> --- a/source/encoder/frameencoder.cpp Thu Jun 29 13:13:56 2017 +0530 >> +++ b/source/encoder/frameencoder.cpp Fri Sep 22 20:20:58 2017 +0530 >> @@ -88,6 +88,7 @@ >> delete[] m_outStreams; >> delete[] m_backupStreams; >> X265_FREE(m_sliceBaseRow); >> + X265_FREE(m_sliceMaxBlockRow); >> X265_FREE(m_cuGeoms); >> X265_FREE(m_ctuGeomMap); >> X265_FREE(m_substreamSizes); >> @@ -118,6 +119,40 @@ >> >> m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); >> ok &= !!m_sliceBaseRow; >> + m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / >> m_param->maxSlices; >> + uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices; >> + uint32_t rowSum = sliceGroupSizeAccu; >> + uint32_t sidx = 0; >> + for (uint32_t i = 0; i < m_numRows; i++) >> + { >> + const uint32_t rowRange = (rowSum >> 8); >> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >> + { >> + rowSum += sliceGroupSizeAccu; >> + m_sliceBaseRow[++sidx] = i; >> + } >> + } >> + X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); >> + m_sliceBaseRow[0] = 0; >> + m_sliceBaseRow[m_param->maxSlices] = m_numRows; >> + >> + m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1); >> + ok &= !!m_sliceMaxBlockRow; >> + uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16; >> + sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices; >> + rowSum = sliceGroupSizeAccu; >> + sidx = 0; >> + for (uint32_t i = 0; i < maxBlockRows; i++) >> + { >> + const uint32_t rowRange = (rowSum >> 8); >> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >> + { >> + rowSum += sliceGroupSizeAccu; >> + m_sliceMaxBlockRow[++sidx] = i; >> + } >> + } >> + m_sliceMaxBlockRow[0] = 0; >> + m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows; >> >> /* determine full motion search range */ >> int range = m_param->searchRange; /* fpel search */ >> @@ -341,6 +376,8 @@ >> m_completionCount = 0; >> m_bAllRowsStop = false; >> m_vbvResetTriggerRow = -1; >> + m_rowSliceTotalBits[0] = 0; >> + m_rowSliceTotalBits[1] = 0; >> >> m_SSDY = m_SSDU = m_SSDV = 0; >> m_ssim = 0; >> @@ -550,28 +587,13 @@ >> >> /* reset entropy coders and compute slice id */ >> m_entropyCoder.load(m_initSliceContext); >> - const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) >> / m_param->maxSlices; >> - const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / >> m_param->maxSlices; >> - m_sliceGroupSize = (uint16_t)sliceGroupSize; >> + >> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >> + for (uint32_t row = m_sliceBaseRow[sliceId]; row < >> m_sliceBaseRow[sliceId + 1]; row++) >> + m_rows[row].init(m_initSliceContext, sliceId); >> >> - uint32_t rowSum = sliceGroupSizeAccu; >> - uint32_t sidx = 0; >> - for (uint32_t i = 0; i < m_numRows; i++) >> - { >> - const uint32_t rowRange = (rowSum >> 8); >> - >> - if ((i >= rowRange) & (sidx != m_param->maxSlices - 1)) >> - { >> - rowSum += sliceGroupSizeAccu; >> - m_sliceBaseRow[++sidx] = i; >> - } >> - >> - m_rows[i].init(m_initSliceContext, sidx); >> - } >> - X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); >> - >> - m_sliceBaseRow[0] = 0; >> - m_sliceBaseRow[m_param->maxSlices] = m_numRows; >> + // reset slice counter for rate control update >> + m_sliceCnt = 0; >> >> uint32_t numSubstreams = m_param->bEnableWavefront ? >> slice->m_sps->numCuInHeight : m_param->maxSlices; >> X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), >> "Multiple slices without WPP unsupport now!"); >> @@ -586,8 +608,10 @@ >> m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]); >> } >> else >> + { >> for (uint32_t i = 0; i < numSubstreams; i++) >> m_outStreams[i].resetBits(); >> + } >> >> int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0; >> >> @@ -697,10 +721,9 @@ >> * compressed in a wave-front pattern if WPP is enabled. Row based >> loop >> * filters runs behind the CTU compression and reconstruction */ >> >> - for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >> - { >> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++) >> m_rows[m_sliceBaseRow[sliceId]].active = true; >> - } >> + >> if (m_param->bEnableWavefront) >> { >> int i = 0; >> @@ -719,6 +742,7 @@ >> } >> } >> } >> + >> if (m_param->bEnableWavefront) >> { >> for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; >> rowInSlice++) >> @@ -751,6 +775,7 @@ >> m_mref[l][ref].applyWeight(rowIdx, >> m_numRows, sliceEndRow, sliceId); >> } >> } >> + >> enableRowEncoder(m_row_to_idx[row]); /* clear external >> dependency for this row */ >> if (!rowInSlice) >> { >> @@ -980,9 +1005,8 @@ >> // complete the slice header by writing WPP row-starts >> m_entropyCoder.setBitstream(&m_bs); >> if (slice->m_pps->bEntropyCodingSyncEnabled) >> - { >> >> m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow], >> (nextSliceRow - prevSliceRow - 1), maxStreamSize); >> - } >> + >> m_bs.writeByteAlignment(); >> >> m_nalList.serialize(slice->m_nalUnitType, m_bs); >> @@ -1211,17 +1235,21 @@ >> int64_t startTime = x265_mdate(); >> if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime) >> m_totalNoWorkerTime += x265_mdate() - m_stallStartTime; >> + >> const uint32_t realRow = m_idx_to_row[row >> 1]; >> const uint32_t typeNum = m_idx_to_row[row & 1]; >> + >> if (!typeNum) >> processRowEncoder(realRow, m_tld[threadId]); >> else >> { >> m_frameFilter.processRow(realRow); >> + >> // NOTE: Active next row >> if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1) >> enqueueRowFilter(m_row_to_idx[realRow + 1]); >> } >> + >> if (ATOMIC_DEC(&m_activeWorkerCount) == 0) >> m_stallStartTime = x265_mdate(); >> >> @@ -1264,20 +1292,18 @@ >> const uint32_t lineStartCUAddr = row * numCols; >> bool bIsVbv = m_param->rc.vbvBufferSize > 0 && >> m_param->rc.vbvMaxBitrate > 0; >> >> + const uint32_t sliceId = curRow.sliceId; >> uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) >> / 16; >> - uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - >> 1)) / 16; >> uint32_t noOfBlocks = m_param->maxCUSize / 16; >> const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - >> 1].sliceId != curRow.sliceId)) ? 1 : 0; >> const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || >> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0; >> - const uint32_t sliceId = curRow.sliceId; >> const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1]; >> const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; >> >> - if (bFirstRowInSlice && !curRow.completed) >> - { >> - // Load SBAC coder context from previous row and initialize row >> state. >> - rowCoder.load(m_initSliceContext); >> - } >> + // Load SBAC coder context from previous row and initialize row >> state. >> + if (bFirstRowInSlice && !curRow.completed) >> + rowCoder.load(m_initSliceContext); >> + >> // calculate mean QP for consistent deltaQP signalling calculation >> if (m_param->bOptCUDeltaQP) >> { >> @@ -1287,15 +1313,12 @@ >> if (m_param->bEnableWavefront || !row) >> { >> double meanQPOff = 0; >> - uint32_t loopIncr, count = 0; >> bool isReferenced = IS_REFERENCED(m_frame); >> double *qpoffs = (isReferenced && m_param->rc.cuTree) ? >> m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset; >> if (qpoffs) >> { >> - if (m_param->rc.qgSize == 8) >> - loopIncr = 8; >> - else >> - loopIncr = 16; >> + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : >> 16; >> + >> uint32_t cuYStart = 0, height = >> m_frame->m_fencPic->m_picHeight; >> if (m_param->bEnableWavefront) >> { >> @@ -1305,6 +1328,7 @@ >> >> uint32_t qgSize = m_param->rc.qgSize, width = >> m_frame->m_fencPic->m_picWidth; >> uint32_t maxOffsetCols = >> (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr; >> + uint32_t count = 0; >> for (uint32_t cuY = cuYStart; cuY < height && (cuY < >> m_frame->m_fencPic->m_picHeight); cuY += qgSize) >> { >> for (uint32_t cuX = 0; cuX < width; cuX += >> qgSize) >> @@ -1336,7 +1360,8 @@ >> } >> curRow.avgQPComputed = 1; >> } >> - } >> + } >> + >> // Initialize restrict on MV range in slices >> tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * >> m_param->maxCUSize * 4) + 3 * 4; >> tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) >> * (m_param->maxCUSize * 4) - 4 * 4); >> @@ -1364,16 +1389,16 @@ >> curRow.bufferedEntropy.copyState(rowCoder); >> curRow.bufferedEntropy.loadContexts(rowCoder); >> } >> - if (!row && m_vbvResetTriggerRow != intRow) >> + if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow) >> { >> curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc; >> curEncData.m_rowStat[row].rowQpScale = >> x265_qp2qScale(curEncData.m_avgQpRc); >> } >> >> FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; >> - if (m_param->bEnableWavefront && row >= col && row && >> m_vbvResetTriggerRow != intRow) >> + if (m_param->bEnableWavefront && rowInSlice >= col && >> !bFirstRowInSlice && m_vbvResetTriggerRow != intRow) >> cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + >> 1].baseQp; >> - else if (!m_param->bEnableWavefront && row && >> m_vbvResetTriggerRow != intRow) >> + else if (!m_param->bEnableWavefront && !bFirstRowInSlice && >> m_vbvResetTriggerRow != intRow) >> cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp; >> else >> cuStat.baseQp = curEncData.m_rowStat[row].rowQp; >> @@ -1385,7 +1410,8 @@ >> { >> cuStat.vbvCost = 0; >> cuStat.intraVbvCost = 0; >> - for (uint32_t h = 0; h < noOfBlocks && block_y < >> maxBlockRows; h++, block_y++) >> + >> + for (uint32_t h = 0; h < noOfBlocks && block_y < >> m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++) >> { >> uint32_t idx = block_x + (block_y * maxBlockCols); >> >> @@ -1433,11 +1459,12 @@ >> { >> // NOTE: in VBV mode, we may reencode anytime, so we can't >> do Deblock stage-Horizon and SAO >> if (!bIsVbv) >> - { >> + { >> // Delay one row to avoid intra prediction conflict >> if (m_pool && !bFirstRowInSlice) >> - { >> + { >> int allowCol = col; >> + >> // avoid race condition on last column >> if (rowInSlice >= 2) >> { >> @@ -1446,11 +1473,13 @@ >> } >> m_frameFilter.m_parallelFilter[row - >> 1].m_allowedCol.set(allowCol); >> } >> + >> // Last Row may start early >> if (m_pool && bLastRowInSlice) >> { >> // Deblocking last row >> int allowCol = col; >> + >> // avoid race condition on last column >> if (rowInSlice >= 2) >> { >> @@ -1472,6 +1501,7 @@ >> >> FrameStats frameLog; >> curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, >> &frameLog); >> + >> // copy number of intra, inter cu per row into frame stats for 2 >> pass >> if (m_param->rc.bStatWrite) >> { >> @@ -1485,10 +1515,8 @@ >> int shift = 2 * (m_param->maxCUDepth - depth); >> int cuSize = m_param->maxCUSize >> depth; >> >> - if (cuSize == 8) >> - curRow.rowStats.intra8x8Cnt += >> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN); >> - else >> - curRow.rowStats.intra8x8Cnt += >> (int)(frameLog.cntIntra[depth] << shift); >> + curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? >> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) : >> + >> (int)(frameLog.cntIntra[depth] << shift); >> >> curRow.rowStats.inter8x8Cnt += >> (int)(frameLog.cntInter[depth] << shift); >> curRow.rowStats.skip8x8Cnt += >> (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift); >> @@ -1518,12 +1546,13 @@ >> if (bIsVbv) >> { >> // Update encoded bits, satdCost, baseQP for each CU if tune >> grain is disabled >> - if ((m_param->bEnableWavefront && (!cuAddr || >> !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront) >> + FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr]; >> + if ((m_param->bEnableWavefront && ((cuAddr == >> m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || >> !m_param->bEnableWavefront) >> { >> - curEncData.m_rowStat[row].rowSatd += >> curEncData.m_cuStat[cuAddr].vbvCost; >> - curEncData.m_rowStat[row].rowIntraSatd += >> curEncData.m_cuStat[cuAddr].intraVbvCost; >> - curEncData.m_rowStat[row].encodedBits += >> curEncData.m_cuStat[cuAddr].totalBits; >> - curEncData.m_rowStat[row].sumQpRc += >> curEncData.m_cuStat[cuAddr].baseQp; >> + curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost; >> + curEncData.m_rowStat[row].rowIntraSatd += >> cuStat.intraVbvCost; >> + curEncData.m_rowStat[row].encodedBits += >> cuStat.totalBits; >> + curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp; >> curEncData.m_rowStat[row].numEncodedCUs = cuAddr; >> } >> >> @@ -1531,7 +1560,7 @@ >> if (!m_param->bEnableWavefront && col == numCols - 1) >> { >> double qpBase = curEncData.m_cuStat[cuAddr].baseQp; >> - int reEncode = >> m_top->m_rateControl->rowVbvRateControl(m_frame, >> row, &m_rce, qpBase); >> + int reEncode = >> m_top->m_rateControl->rowVbvRateControl(m_frame, >> row, &m_rce, qpBase, m_sliceBaseRow, sliceId); >> qpBase = x265_clip3((double)m_param->rc.qpMin, >> (double)m_param->rc.qpMax, qpBase); >> curEncData.m_rowStat[row].rowQp = qpBase; >> curEncData.m_rowStat[row].rowQpScale = >> x265_qp2qScale(qpBase); >> @@ -1557,15 +1586,16 @@ >> } >> } >> // If current block is at row diagonal checkpoint, call vbv >> ratecontrol. >> - else if (m_param->bEnableWavefront && row == col && row) >> + else if (m_param->bEnableWavefront && rowInSlice == col && >> !bFirstRowInSlice) >> { >> if (m_param->rc.bEnableConstVbv) >> { >> - int32_t startCuAddr = numCols * row; >> - int32_t EndCuAddr = startCuAddr + col; >> - for (int32_t r = row; r >= 0; r--) >> + uint32_t startCuAddr = numCols * row; >> + uint32_t EndCuAddr = startCuAddr + col; >> + >> + for (int32_t r = row; r >= >> (int32_t)m_sliceBaseRow[sliceId]; r--) >> { >> - for (int32_t c = startCuAddr; c <= EndCuAddr && >> c <= (int32_t)numCols * (r + 1) - 1; c++) >> + for (uint32_t c = startCuAddr; c <= EndCuAddr && >> c <= numCols * (r + 1) - 1; c++) >> { >> curEncData.m_rowStat[r].rowSatd += >> curEncData.m_cuStat[c].vbvCost; >> curEncData.m_rowStat[r].rowIntraSatd += >> curEncData.m_cuStat[c].intraVbvCost; >> @@ -1578,10 +1608,10 @@ >> } >> } >> double qpBase = curEncData.m_cuStat[cuAddr].baseQp; >> - int reEncode = >> m_top->m_rateControl->rowVbvRateControl(m_frame, >> row, &m_rce, qpBase); >> + int reEncode = >> m_top->m_rateControl->rowVbvRateControl(m_frame, >> row, &m_rce, qpBase, m_sliceBaseRow, sliceId); >> qpBase = x265_clip3((double)m_param->rc.qpMin, >> (double)m_param->rc.qpMax, qpBase); >> curEncData.m_rowStat[row].rowQp = qpBase; >> - curEncData.m_rowStat[row].rowQpScale = >> x265_qp2qScale(qpBase); >> + curEncData.m_rowStat[row].rowQpScale = >> x265_qp2qScale(qpBase); >> >> if (reEncode < 0) >> { >> @@ -1592,7 +1622,7 @@ >> m_vbvResetTriggerRow = row; >> m_bAllRowsStop = true; >> >> - for (uint32_t r = m_numRows - 1; r >= row; r--) >> + for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >> >= row; r--) >> { >> CTURow& stopRow = m_rows[r]; >> >> @@ -1670,14 +1700,15 @@ >> return; >> } >> } >> + >> /* this row of CTUs has been compressed */ >> if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv) >> { >> - if (row == m_numRows - 1) >> + if (bLastRowInSlice) >> { >> - for (int32_t r = 0; r < (int32_t)m_numRows; r++) >> + for (uint32_t r = m_sliceBaseRow[sliceId]; r < >> m_sliceBaseRow[sliceId + 1]; r++) >> { >> - for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs >> + 1; c < (int32_t)numCols * (r + 1); c++) >> + for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs >> + 1; c < numCols * (r + 1); c++) >> { >> curEncData.m_rowStat[r].rowSatd += >> curEncData.m_cuStat[c].vbvCost; >> curEncData.m_rowStat[r].rowIntraSatd += >> curEncData.m_cuStat[c].intraVbvCost; >> @@ -1695,26 +1726,41 @@ >> * after half the frame is encoded, but after this initial period we >> update >> * after refLagRows (the number of rows reference frames must have >> completed >> * before referencees may begin encoding) */ >> - uint32_t rowCount = 0; >> if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv) >> { >> + uint32_t rowCount = 0; >> + uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - >> m_sliceBaseRow[sliceId]; >> if (!m_rce.encodeOrder) >> - rowCount = m_numRows - 1; >> + rowCount = maxRows - 1; >> else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / >> m_param->fpsDenom)) >> - rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1); >> + rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1); >> else >> - rowCount = X265_MIN(m_refLagRows, m_numRows - 1); >> - if (row == rowCount) >> + rowCount = X265_MIN(m_refLagRows / >> m_param->maxSlices, maxRows - 1); >> + >> + if (rowInSlice == rowCount) >> { >> - m_rce.rowTotalBits = 0; >> + m_rowSliceTotalBits[sliceId] = 0; >> if (bIsVbv) >> - for (uint32_t i = 0; i < rowCount; i++) >> - m_rce.rowTotalBits += curEncData.m_rowStat[i].encode >> dBits; >> + { >> + for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount >> + m_sliceBaseRow[sliceId]; i++) >> + m_rowSliceTotalBits[sliceId] += >> curEncData.m_rowStat[i].encodedBits; >> + } >> else >> - for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; >> cuAddr++) >> - m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].to >> talBits; >> + { >> + uint32_t startAddr = rowCount * numCols * sliceId; >> + uint32_t finishAddr = startAddr + rowCount * numCols; >> + >> + for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; >> cuAddr++) >> + m_rowSliceTotalBits[sliceId] += >> curEncData.m_cuStat[cuAddr].totalBits; >> + } >> >> - m_top->m_rateControl->rateControlUpdateStats(&m_rce); >> + if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices) >> + { >> + m_rce.rowTotalBits = 0; >> + for (uint32_t i = 0; i < m_param->maxSlices; i++) >> + m_rce.rowTotalBits += m_rowSliceTotalBits[i]; >> + m_top->m_rateControl->rateControlUpdateStats(&m_rce); >> + } >> } >> } >> >> @@ -1742,11 +1788,13 @@ >> if (rowInSlice >= m_filterRowDelay) >> { >> enableRowFilter(m_row_to_idx[row - m_filterRowDelay]); >> + >> /* NOTE: Activate filter if first row (row 0) */ >> if (rowInSlice == m_filterRowDelay) >> enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]); >> tryWakeOne(); >> } >> + >> if (bLastRowInSlice) >> { >> for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < >> endRowInSlicePlus1; i++) >> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.h >> --- a/source/encoder/frameencoder.h Thu Jun 29 13:13:56 2017 +0530 >> +++ b/source/encoder/frameencoder.h Fri Sep 22 20:20:58 2017 +0530 >> @@ -138,6 +138,7 @@ >> volatile bool m_bAllRowsStop; >> volatile int m_completionCount; >> volatile int m_vbvResetTriggerRow; >> + volatile int m_sliceCnt; >> >> uint32_t m_numRows; >> uint32_t m_numCols; >> @@ -147,8 +148,10 @@ >> >> CTURow* m_rows; >> uint16_t m_sliceAddrBits; >> - uint16_t m_sliceGroupSize; >> - uint32_t* m_sliceBaseRow; >> + uint32_t m_sliceGroupSize; >> + uint32_t* m_sliceBaseRow; >> + uint32_t* m_sliceMaxBlockRow; >> + int64_t m_rowSliceTotalBits[2]; >> RateControlEntry m_rce; >> SEIDecodedPictureHash m_seiReconPictureDigest; >> >> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.cpp >> --- a/source/encoder/ratecontrol.cpp Thu Jun 29 13:13:56 2017 +0530 >> +++ b/source/encoder/ratecontrol.cpp Fri Sep 22 20:20:58 2017 +0530 >> @@ -732,7 +732,6 @@ >> m_bitrate = m_param->rc.bitrate * 1000; >> } >> >> - >> void RateControl::initHRD(SPS& sps) >> { >> int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; >> @@ -765,6 +764,7 @@ >> >> #undef MAX_DURATION >> } >> + >> bool RateControl::analyseABR2Pass(uint64_t allAvailableBits) >> { >> double rateFactor, stepMult; >> @@ -1473,6 +1473,7 @@ >> >> return q; >> } >> + >> double RateControl::countExpectedBits(int startPos, int endPos) >> { >> double expectedBits = 0; >> @@ -1484,6 +1485,7 @@ >> } >> return expectedBits; >> } >> + >> bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int >> over, int endPos) >> { >> /* find an interval ending on an overflow or underflow (depending on >> whether >> @@ -1531,6 +1533,7 @@ >> } >> return adjusted; >> } >> + >> bool RateControl::cuTreeReadFor2Pass(Frame* frame) >> { >> int index = m_encOrder[frame->m_poc]; >> @@ -1579,24 +1582,24 @@ >> double RateControl::tuneAbrQScaleFromFeedback(double qScale) >> { >> double abrBuffer = 2 * m_rateTolerance * m_bitrate; >> - /* use framesDone instead of POC as poc count is not serial with >> bframes enabled */ >> - double overflow = 1.0; >> - double timeDone = (double)(m_framesDone - >> m_param->frameNumThreads + 1) * m_frameDuration; >> - double wantedBits = timeDone * m_bitrate; >> - int64_t encodedBits = m_totalBits; >> - if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) >> - { >> - abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); >> - encodedBits = m_encodedBits; >> - } >> + /* use framesDone instead of POC as poc count is not serial with >> bframes enabled */ >> + double overflow = 1.0; >> + double timeDone = (double)(m_framesDone - m_param->frameNumThreads + >> 1) * m_frameDuration; >> + double wantedBits = timeDone * m_bitrate; >> + int64_t encodedBits = m_totalBits; >> + if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps) >> + { >> + abrBuffer = m_param->totalFrames * (m_bitrate / m_fps); >> + encodedBits = m_encodedBits; >> + } >> >> - if (wantedBits > 0 && encodedBits > 0 && >> (!m_partialResidualFrames || >> - m_param->rc.bStrictCbr || m_isGrainEnabled)) >> - { >> - abrBuffer *= X265_MAX(1, sqrt(timeDone)); >> - overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - >> wantedBits) / abrBuffer); >> - qScale *= overflow; >> - } >> + if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames || >> + m_param->rc.bStrictCbr || m_isGrainEnabled)) >> + { >> + abrBuffer *= X265_MAX(1, sqrt(timeDone)); >> + overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) >> / abrBuffer); >> + qScale *= overflow; >> + } >> return qScale; >> } >> >> @@ -2330,17 +2333,18 @@ >> return totalSatdBits + encodedBitsSoFar; >> } >> >> -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, >> RateControlEntry* rce, double& qpVbv) >> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, >> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t >> sliceId) >> { >> FrameData& curEncData = *curFrame->m_encData; >> double qScaleVbv = x265_qp2qScale(qpVbv); >> uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd; >> double encodedBits = curEncData.m_rowStat[row].encodedBits; >> + uint32_t rowInSlice = row - m_sliceBaseRow[sliceId]; >> >> - if (m_param->bEnableWavefront && row == 1) >> + if (m_param->bEnableWavefront && rowInSlice == 1) >> { >> - rowSatdCost += curEncData.m_rowStat[0].rowSatd; >> - encodedBits += curEncData.m_rowStat[0].encodedBits; >> + rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd; >> + encodedBits += curEncData.m_rowStat[row - 1].encodedBits; >> } >> rowSatdCost >>= X265_DEPTH - 8; >> updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, >> encodedBits); >> @@ -2350,8 +2354,8 @@ >> if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp) >> { >> uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowI >> ntraSatd; >> - if (m_param->bEnableWavefront && row == 1) >> - intraRowSatdCost += curEncData.m_rowStat[0].rowInt >> raSatd; >> + if (m_param->bEnableWavefront && rowInSlice == 1) >> + intraRowSatdCost += curEncData.m_rowStat[row - >> 1].rowIntraSatd; >> intraRowSatdCost >>= X265_DEPTH - 8; >> updatePredictor(rce->rowPred[1], qScaleVbv, >> (double)intraRowSatdCost, encodedBits); >> } >> @@ -2376,7 +2380,7 @@ >> const SPS& sps = *curEncData.m_slice->m_sps; >> double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight); >> >> - if (row < sps.numCuInHeight - 1) >> + if (row < m_sliceBaseRow[sliceId + 1] - 1) >> { >> /* More threads means we have to be more cautious in letting >> ratecontrol use up extra bits. */ >> double rcTol = bufferLeftPlanned / m_param->frameNumThreads * >> m_rateTolerance; >> @@ -2693,8 +2697,8 @@ >> m_encodedBitsWindow[pos % s_slidingWindowFrames] = >> actualBits; >> if(rce->sliceType != I_SLICE) >> { >> - int qp = int (rce->qpaRc + 0.5); >> - m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits >> : (m_qpToEncodedBits[qp] + actualBits) * 0.5; >> + int qp = int (rce->qpaRc + 0.5); >> + m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? >> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5; >> } >> curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow; >> curFrame->m_rcData->cplxrSum = m_cplxrSum; >> @@ -2779,7 +2783,8 @@ >> curFrame->m_encData->m_frameStats.percent8x8Skip * m_ncu) >> < 0) >> goto writeFailure; >> } >> - else{ >> + else >> + { >> RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps; >> int i, num = rpsWriter->numberOfPictures; >> char deltaPOC[128]; >> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.h >> --- a/source/encoder/ratecontrol.h Thu Jun 29 13:13:56 2017 +0530 >> +++ b/source/encoder/ratecontrol.h Fri Sep 22 20:20:58 2017 +0530 >> @@ -244,7 +244,7 @@ >> int rateControlStart(Frame* curFrame, RateControlEntry* rce, >> Encoder* enc); >> void rateControlUpdateStats(RateControlEntry* rce); >> int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* >> rce, int *filler); >> - int rowVbvRateControl(Frame* curFrame, uint32_t row, >> RateControlEntry* rce, double& qpVbv); >> + int rowVbvRateControl(Frame* curFrame, uint32_t row, >> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t >> sliceId); >> int rateControlSliceType(int frameNum); >> bool cuTreeReadFor2Pass(Frame* curFrame); >> void hrdFullness(SEIBufferingPeriod* sei); >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel >> > > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
