Ok, so this patch removes the code introduced by the previous one, I will fold it in together to not break tests.
On Tue, Jun 9, 2015 at 11:36 PM, Min Chen <[email protected]> wrote: > # HG changeset patch > # User Min Chen <[email protected]> > # Date 1433872875 25200 > # Node ID 134670771e0c1dd0800c3e9db0a1f9f69c467e36 > # Parent 04c9567aa2bb7b549cd6a3514a1ef29d64724638 > split fast RD path in codeCoeffNxN() > --- > source/encoder/entropy.cpp | 131 > +++++++++++++++++++++++++++++--------------- > 1 files changed, 86 insertions(+), 45 deletions(-) > > diff -r 04c9567aa2bb -r 134670771e0c source/encoder/entropy.cpp > --- a/source/encoder/entropy.cpp Tue Jun 09 11:01:13 2015 -0700 > +++ b/source/encoder/entropy.cpp Tue Jun 09 11:01:15 2015 -0700 > @@ -1690,66 +1690,106 @@ > CTZ(firstNZPosInCG, subCoeffFlag); > > bool signHidden = (lastNZPosInCG - firstNZPosInCG >= > SBH_THRESHOLD); > - uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0; > + uint32_t ctxSet = ((subSet > 0) & bIsLuma) ? 2 : 0; > > - if (c1 == 0) > - ctxSet++; > + ctxSet += (c1 == 0); > > c1 = 1; > - uint8_t *baseCtxMod = bIsLuma ? > &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : > &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet]; > + uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : > NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet]; > > uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER); > - int firstC2Flag = -1; > + X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n"); > > - X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n"); > - idx = 0; > - do > + uint32_t firstC2Flag = 2; > + uint32_t c1Next = 0xFFFFFFFE; > + if (!m_bitIf) > { > - uint32_t symbol1 = absCoeff[idx] > 1; > - uint32_t symbol2 = absCoeff[idx] > 2; > - encodeBin(symbol1, baseCtxMod[c1]); > + uint32_t sum = 0; > + // Fast RD path > + idx = 0; > + do > + { > + uint32_t symbol1 = absCoeff[idx] > 1; > + uint32_t symbol2 = absCoeff[idx] > 2; > + //encodeBin(symbol1, baseCtxMod[c1]); > + { > + const uint32_t mstate = baseCtxMod[c1]; > + baseCtxMod[c1] = sbacNext(mstate, symbol1); > + sum += sbacGetEntropyBits(mstate, symbol1); > + } > > - // TODO: VC can't work fine on below style, but ICL can > generate branch free code > -#ifdef __INTEL_COMPILER > - if (symbol) > - c1 = 0; > + if (symbol1) > + c1Next = 0; > + if (symbol1 + firstC2Flag == 3) > + firstC2Flag = symbol2; > > - if ((firstC2Flag < 0) & symbol) > - firstC2Flag = (int)symbol2; > -#else > - if (symbol1) > + c1 = (c1Next & 3); > + c1Next >>= 2; > + X265_CHECK(c1 <= 3, "c1 check failure\n"); > + idx++; > + } > + while(idx < numC1Flag); > + > + if (!c1) > { > - c1 = 0; > - if (firstC2Flag < 0) > - firstC2Flag = (int)symbol2; > + X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check > failure\n"); > + > + baseCtxMod = &m_contextState[(bIsLuma ? 0 : > NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet]; > + > + //encodeBin(firstC2Flag, baseCtxMod[0]); > + { > + const uint32_t mstate = baseCtxMod[0]; > + baseCtxMod[0] = sbacNext(mstate, firstC2Flag); > + sum += sbacGetEntropyBits(mstate, firstC2Flag); > + } > } > -#endif > - c1 += ((c1 >> 1) ^ c1) & 1; > - X265_CHECK(c1 <= 3, "c1 check failure\n"); > - idx++; > - } > - while(idx < numC1Flag); > + m_fracBits += (sum & 0xFFFFFF); > > - if (!c1) > - { > - baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + > ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + > ctxSet]; > + const int hiddenShift = (bHideFirstSign & signHidden) ? 1 > : 0; > + //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - > hiddenShift); > + m_fracBits += (numNonZero - hiddenShift) << 15; > > - X265_CHECK((firstC2Flag != -1), "firstC2FlagIdx check > failure\n"); > - encodeBin(firstC2Flag, baseCtxMod[0]); > - } > - > - const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : > 0; > - encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - > hiddenShift); > - > - if (!c1 || numNonZero > C1FLAG_NUMBER) > - { > - if (!m_bitIf) > + if (!c1 || numNonZero > C1FLAG_NUMBER) > { > - // Fast RD path > uint32_t sum = primitives.costCoeffRemain(absCoeff, > numNonZero); > m_fracBits += ((uint64_t)sum << 15); > } > - else > + } > + // Standard path > + else > + { > + idx = 0; > + do > + { > + uint32_t symbol1 = absCoeff[idx] > 1; > + uint32_t symbol2 = absCoeff[idx] > 2; > + encodeBin(symbol1, baseCtxMod[c1]); > + > + if (symbol1) > + c1Next = 0; > + > + if (symbol1 + firstC2Flag == 3) > + firstC2Flag = symbol2; > + > + c1 = (c1Next & 3); > + c1Next >>= 2; > + X265_CHECK(c1 <= 3, "c1 check failure\n"); > + idx++; > + } > + while(idx < numC1Flag); > + > + if (!c1) > + { > + baseCtxMod = &m_contextState[(bIsLuma ? 0 : > NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet]; > + > + X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check > failure\n"); > + encodeBin(firstC2Flag, baseCtxMod[0]); > + } > + > + const int hiddenShift = (bHideFirstSign && signHidden) ? > 1 : 0; > + encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - > hiddenShift); > + > + if (!c1 || numNonZero > C1FLAG_NUMBER) > { > // Standard path > uint32_t goRiceParam = 0; > @@ -1776,8 +1816,9 @@ > } > while(idx < numNonZero); > } > - } > - } > + } // end of !bitIf > + } // end of (numNonZero > 0) > + > // Initialize value for next loop > numNonZero = 0; > scanPosSigOff = (1 << MLS_CG_SIZE) - 1; > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
