Min, On Mon, Sep 16, 2013 at 5:04 PM, Min Chen <[email protected]> wrote:
> # HG changeset patch > # User Min Chen <[email protected]> > # Date 1379324742 -28800 > # Node ID 3cae8b54f7e7139ec0502d44fef8088280826a25 > # Parent 5a00b5a430d138ed4f77d008d36f037aa8536a6d > Use mixed bitmap between FrameEncoder and FrameFilter to Fix crash and > hash mistake in WPP mode > > Use mixed bitmap between FrameEncoder and FrameFilter to fix crash/hash errors with WPP enabled > I change task schedult bitmap to mixed FrameEncoder and FrameFilter > because there catch two bugs, and I want to reduce latency of Frame > Parallelism. > The new bitmap mapping 2N+0 to FrameEncoder and 2N+1 to FrameFilter. > > Change the task scheduling bitmap to alternate between FrameEncoder and FrameFilter. This avoids 2 bugs (described below), and reduces the latency with multiple frames encoded in parallel. The new bitmap maps the 2Nth thread to FrameEncoder and the 2N+1-th thread to FrameFilter. > Side effect: > 1. We can remove the lock from FrameFilter. > 2. Mixed bitmap let us do Filter early, so reduce latency of Frame > Parallelism > > Mixed bitmap lets us execute filter early, reducing latency of frame parallelism. > Solved bugs: > 1. CRASH: the reason is sometime two of threads finish in same time, > so they will enter Filter in wrong order and sent Finished Event > early. > when main thread dequeue JobProvider and execute FrameFilter, we > will catch a crash! > Two threads could finish simultanously, but execute filter in the wrong order and send a finished Event at the end of the frame early. > > 2. HASH MISTAKE: the reason is same as below, but last row is right > order, we will got worng reconst image. > > Out-of-order execution of filter, but the last row finishes in order, so a crash is avoided. But the recon image is obviously incorrect. > diff -r 5a00b5a430d1 -r 3cae8b54f7e7 source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Mon Sep 16 17:41:02 2013 +0800 > +++ b/source/encoder/frameencoder.cpp Mon Sep 16 17:45:42 2013 +0800 > @@ -98,7 +98,8 @@ > m_rows[i].create(top); > } > > - if (!WaveFront::init(m_numRows)) > + // NOTE: 2 times of numRows because both Encoder and Filter in same > queue > + if (!WaveFront::init(m_numRows * 2)) > { > assert(!"Unable to initialize job queue."); > m_pool = NULL; > @@ -870,9 +871,9 @@ > } > } > > - WaveFront::enableRow(row); > + enableRowEncoder(row); > if (row == 0) > - WaveFront::enqueueRow(row); > + enqueueRowEncoder(0); > else > m_pool->pokeIdleThread(); > } > @@ -883,31 +884,43 @@ > } > else > { > - for (int i = 0; i < this->m_numRows; i++) > + for (int i = 0; i < this->m_numRows + m_filterRowDelay; i++) > { > - // block until all reference frames have reconstructed the > rows we need > - for (int l = 0; l < numPredDir; l++) > + // Encoder > + if (i < m_numRows) > { > - RefPicList list = (l ? REF_PIC_LIST_1 : REF_PIC_LIST_0); > - for (int ref = 0; ref < slice->getNumRefIdx(list); ref++) > + // block until all reference frames have reconstructed > the rows we need > + for (int l = 0; l < numPredDir; l++) > { > - TComPic *refpic = slice->getRefPic(list, ref); > - while ((refpic->m_reconRowCount != (UInt)m_numRows) > && (refpic->m_reconRowCount < i + refLagRows)) > + RefPicList list = (l ? REF_PIC_LIST_1 : > REF_PIC_LIST_0); > + for (int ref = 0; ref < slice->getNumRefIdx(list); > ref++) > { > - refpic->m_reconRowWait.wait(); > + TComPic *refpic = slice->getRefPic(list, ref); > + while ((refpic->m_reconRowCount != > (UInt)m_numRows) && (refpic->m_reconRowCount < i + refLagRows)) > + { > + refpic->m_reconRowWait.wait(); > + } > } > } > + > + processRow(i * 2 + 0); > } > > - processRow(i); > + // Filter > + if (i >= m_filterRowDelay) > + { > + processRow((i - m_filterRowDelay) * 2 + 1); > + } > } > } > } > > -void FrameEncoder::processRow(int row) > +void FrameEncoder::processRowEncoder(int row) > { > PPAScopeEvent(Thread_ProcessRow); > > + //printf("Encoder(%2d)\n", row); > + > // Called by worker threads > CTURow& curRow = m_rows[row]; > CTURow& codeRow = m_rows[m_cfg->param.bEnableWavefront ? row : 0]; > @@ -941,7 +954,7 @@ > m_rows[row + 1].m_completed + 2 <= > m_rows[row].m_completed) > { > m_rows[row + 1].m_active = true; > - WaveFront::enqueueRow(row + 1); > + enqueueRowEncoder(row + 1); > } > } > > @@ -957,13 +970,16 @@ > // Run row-wise loop filters > if (row >= m_filterRowDelay) > { > - m_frameFilter.processRow(row - m_filterRowDelay); > + enqueueRowFilter(row - m_filterRowDelay); > + > + // NOTE: Active Filter to first row (row 0) > + if (row == m_filterRowDelay) > + enableRowFilter(0); > } > if (row == m_numRows - 1) > { > for(int i = m_numRows - m_filterRowDelay; i < m_numRows; i++) > - m_frameFilter.processRow(i); > - m_completionEvent.trigger(); > + enqueueRowFilter(i); > } > } > > diff -r 5a00b5a430d1 -r 3cae8b54f7e7 source/encoder/frameencoder.h > --- a/source/encoder/frameencoder.h Mon Sep 16 17:41:02 2013 +0800 > +++ b/source/encoder/frameencoder.h Mon Sep 16 17:45:42 2013 +0800 > @@ -64,7 +64,54 @@ > > void destroy(); > > - void processRow(int row); > + void processRowEncoder(int row); > + > + void processRowFilter(int row) > + { > + m_frameFilter.processRow(row); > + } > + > + void enqueueRowEncoder(int row) > + { > + WaveFront::enqueueRow(row * 2 + 0); > + } > + > + void enqueueRowFilter(int row) > + { > + WaveFront::enqueueRow(row * 2 + 1); > + } > + > + void enableRowEncoder(int row) > + { > + WaveFront::enableRow(row * 2 + 0); > + } > + > + void enableRowFilter(int row) > + { > + WaveFront::enableRow(row * 2 + 1); > + } > + > + void processRow(int row) > + { > + const int realRow = row >> 1; > + const int typeNum = row & 1; > + > + // TODO: use switch when more type > + if (typeNum == 0) > + { > + processRowEncoder(realRow); > + } > + else > + { > + processRowFilter(realRow); > + > + // NOTE: Active next row > + if (realRow != m_numRows - 1) > + enableRowFilter(realRow + 1); > + else > + m_completionEvent.trigger(); > + } > + } > > TEncEntropy* getEntropyCoder(int row) { return > &this->m_rows[row].m_entropyCoder; } > > diff -r 5a00b5a430d1 -r 3cae8b54f7e7 source/encoder/framefilter.cpp > --- a/source/encoder/framefilter.cpp Mon Sep 16 17:41:02 2013 +0800 > +++ b/source/encoder/framefilter.cpp Mon Sep 16 17:45:42 2013 +0800 > @@ -102,8 +102,6 @@ > { > PPAScopeEvent(Thread_filterCU); > > - ScopedLock s(m_filterLock); // Only allow one row to be filtered at a > time > - > if (!m_cfg->param.bEnableLoopFilter) > { > processRowPost(row); > diff -r 5a00b5a430d1 -r 3cae8b54f7e7 source/encoder/framefilter.h > --- a/source/encoder/framefilter.h Mon Sep 16 17:41:02 2013 +0800 > +++ b/source/encoder/framefilter.h Mon Sep 16 17:45:42 2013 +0800 > @@ -70,8 +70,6 @@ > TEncBinCABACCounter m_rdGoOnBinCodersCABAC; > TComBitCounter m_bitCounter; > TEncSbac* m_rdGoOnSbacCoderRow0; // for bitstream > exact only, depends on HM's bug > - > - Lock m_filterLock; > }; > } > > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
