On Sat, Sep 6, 2014 at 10:08 PM, Steve Borho <st...@borho.org> wrote:
> # HG changeset patch > # User Steve Borho <st...@borho.org> > # Date 1409932577 -7200 > # Fri Sep 05 17:56:17 2014 +0200 > # Node ID 07d69bce1760a28be1b1ee1821dfeb3335602422 > # Parent 795878af39730deb24e2ee0e585c625084bb031b > frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled > > This is a performance optimization, it allows the encoder to generate the > final > bitstream of each CTU as it is compressed and cache hot. > > When SAO is enabled, SAO analysis must be performed and coded at the start > of > the CTU but SAO analysis currently requires surrounding CTUs to be encoded > making the second pass unavoidable. > > diff -r 795878af3973 -r 07d69bce1760 source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Fri Sep 05 16:03:44 2014 +0200 > +++ b/source/encoder/frameencoder.cpp Fri Sep 05 17:56:17 2014 +0200 > @@ -192,16 +192,6 @@ > } > } > > - uint32_t numSubstreams = m_param->bEnableWavefront ? > m_frame->getPicSym()->getFrameHeightInCU() : 1; > - if (!m_outStreams) > - { > - m_outStreams = new Bitstream[numSubstreams]; > - m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams); > - } > - else > - for (uint32_t i = 0; i < numSubstreams; i++) > - m_outStreams[i].resetBits(); > - > /* Get the QP for this frame from rate control. This call may block > until > * frames ahead of it in encode order have called rateControlEnd() */ > int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, > m_top); > @@ -214,6 +204,24 @@ > > m_frameFilter.start(m_frame, m_initSliceContext, qp); > > + // reset entropy coders > + m_entropyCoder.load(m_initSliceContext); > + for (int i = 0; i < m_numRows; i++) > + m_rows[i].init(m_initSliceContext); > + > + uint32_t numSubstreams = m_param->bEnableWavefront ? > m_frame->getPicSym()->getFrameHeightInCU() : 1; > + if (!m_outStreams) > + { > + m_outStreams = new Bitstream[numSubstreams]; > + m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams); > + if (!m_param->bEnableSAO) > + for (uint32_t i = 0; i < numSubstreams; i++) > + > m_rows[i].rdEntropyCoders[0][CI_CURR_BEST].setBitstream(&m_outStreams[i]); > + } > + else > + for (uint32_t i = 0; i < numSubstreams; i++) > + m_outStreams[i].resetBits(); > + > if (m_frame->m_lowres.bKeyframe) > { > if (m_param->bEmitHRDSEI) > @@ -328,7 +336,7 @@ > m_entropyCoder.setBitstream(&m_bs); > m_entropyCoder.codeSliceHeader(slice); > > - // re-encode each row of CUs for the final time (TODO: get rid of > this second pass) > + // finish encode of each CTU row > encodeSlice(); > > // serialize each row, record final lengths in slice header > @@ -409,8 +417,40 @@ > const uint32_t widthInLCUs = > m_frame->getPicSym()->getFrameWidthInCU(); > const uint32_t lastCUAddr = (slice->m_endCUAddr + > m_frame->getNumPartInCU() - 1) / m_frame->getNumPartInCU(); > const int numSubstreams = m_param->bEnableWavefront ? > m_frame->getPicSym()->getFrameHeightInCU() : 1; > + > + if (!m_param->bEnableSAO) > + { > + /* terminate each row and collect stats */ > + for (uint32_t cuAddr = 0; cuAddr < lastCUAddr; cuAddr++) > + { > + uint32_t col = cuAddr % widthInLCUs; > + > + if (m_param->bEnableWavefront && col == widthInLCUs - 1) > + { > + uint32_t lin = cuAddr / widthInLCUs; > + uint32_t subStrm = lin % numSubstreams; > + > m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1); > + > m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish(); > + m_outStreams[subStrm].writeByteAlignment(); > + } > + > + // Collect Frame Stats for 2 pass > + TComDataCU* cu = m_frame->getCU(cuAddr); > + m_frameStats.mvBits += cu->m_mvBits; > + m_frameStats.coeffBits += cu->m_coeffBits; > + m_frameStats.miscBits += cu->m_totalBits - (cu->m_mvBits + > cu->m_coeffBits); > + } > + if (!m_param->bEnableWavefront) > + { > + > m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1); > + m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish(); > + m_outStreams[0].writeByteAlignment(); > + } > + > + return; > + } > + > SAOParam *saoParam = slice->m_pic->getPicSym()->m_saoParam; > - > for (uint32_t cuAddr = 0; cuAddr < lastCUAddr; cuAddr++) > { > uint32_t col = cuAddr % widthInLCUs; > @@ -487,11 +527,6 @@ > PPAScopeEvent(FrameEncoder_compressRows); > Slice* slice = m_frame->m_picSym->m_slice; > > - // reset entropy coders > - m_entropyCoder.load(m_initSliceContext); > - for (int i = 0; i < m_numRows; i++) > - m_rows[i].init(m_initSliceContext); > - > m_bAllRowsStop = false; > m_vbvResetTriggerRow = -1; > > @@ -672,15 +707,17 @@ > } > > if (m_param->bEnableWavefront && col == 0 && row > 0) > + { > // Load SBAC coder context from previous row. > + > curRow.rdEntropyCoders[0][CI_CURR_BEST].copyState(m_initSliceContext); > > curRow.rdEntropyCoders[0][CI_CURR_BEST].loadContexts(m_rows[row - > 1].bufferEntropyCoder); > It's the same thing in encodeSlice as well, but why are we copying State from m_initSliceContext, and context from the saved previous row Coder? Shouldnt both state and context be copied from the previous row coder? > + } > > tld.cuCoder.m_quant.setQPforQuant(cu); > tld.cuCoder.compressCU(cu); // Does all the CU analysis > > /* advance top-level CI_CURR_BEST to include the context of this > CTU. > - * Note that if SAO was disabled this could directly write to a > - * bitstream object and we could skip most of encodeSlice() */ > + * if SAO is disabled, this writes final CTU bitstream */ > curRow.rdEntropyCoders[0][CI_CURR_BEST].encodeCU(cu); > > if (m_param->bEnableWavefront && col == 1) > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel