On 08/14, dave wrote: > On 08/14/2014 01:42 PM, Steve Borho wrote: > ># HG changeset patch > ># User Steve Borho <st...@borho.org> > ># Date 1408048681 18000 > ># Thu Aug 14 15:38:01 2014 -0500 > ># Node ID 07138e6ac952c96d1e31f5490c44f4cfaf6ac12a > ># Parent 213f17c1492c5bf96c3f382e7beffe0c871a563c > >analysis: use macro and for-loop to simplify fast-intra > > > >this changes behavior a bit; it's trying both +/-1 offsets instead of just > >one. and it has to do one extra check at the end since mode 34 isn't reached > >by the other previous loops > > > >diff -r 213f17c1492c -r 07138e6ac952 source/encoder/analysis.cpp > >--- a/source/encoder/analysis.cpp Thu Aug 14 09:43:39 2014 -0700 > >+++ b/source/encoder/analysis.cpp Thu Aug 14 15:38:01 2014 -0500 > >@@ -1693,68 +1693,56 @@ > > bool modeHor; > > pixel *cmp; > > intptr_t srcStride; > >+ > >+#define TRY_ANGLE(angle) \ > >+ modeHor = angle < 18; \ > >+ cmp = modeHor ? buf_trans : fenc; \ > >+ srcStride = modeHor ? scaleTuSize : scaleStride; \ > >+ sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) > ><< costShift; \ > >+ bits = (mpms & ((uint64_t)1 << angle)) ? xModeBitsIntra(cu, angle, > >partOffset, depth) : rbits; \ > >+ cost = m_rdCost.calcRdSADCost(sad, bits) > >+ > > if (m_param->bEnableFastIntra) > > { > >- int lowsad, highsad, asad = 0; > >- uint32_t lowbits, highbits, amode, lowmode, highmode, abits = 0; > >- uint64_t lowcost, highcost = MAX_INT64, acost = MAX_INT64; > >+ int asad = 0; > >+ uint32_t lowmode, highmode, amode, abits = 0; > >+ uint64_t acost = MAX_INT64; > >- for (mode = 4;mode < 35; mode += 5) > >+ /* pick the best angle, sampling at distance of 5 */ > >+ for (mode = 5; mode < 35; mode += 5)
Thanks for reviewing > By starting with mode = 5, won't this miss mode 2 since only +/-2 is > checked? By starting from 4 the loop should end at 34. if 5 was the best angle of the initial sweep, we'll try +/- 2 (3 and 7). If 3 is the new best we try +/-1 which would be 2 and 4. On the high end of the spectrum; if 30 was the best cost, it will try 28 and 32, then 33 and 31. Starting with 4 would remove the need for the extra check at the end, but at the same time we would need to range-check the low/high modes as well, since it could reach mode 1 (planar) or modes above 34. > >- modeHor = (mode < 18); > >- cmp = (modeHor ? buf_trans : fenc); > >- srcStride = (modeHor ? scaleTuSize : scaleStride); > >- sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], > >scaleTuSize) << costShift; > >- bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : > >xModeBitsIntra(cu, mode, partOffset, depth); > >- cost = m_rdCost.calcRdSADCost(sad, bits); > >+ TRY_ANGLE(mode); > > COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits); > > } > >- lowmode = amode - 2; > >- modeHor = (lowmode < 18); > >- cmp = (modeHor ? buf_trans : fenc); > >- srcStride = (modeHor ? scaleTuSize : scaleStride); > >- lowsad = sa8d(cmp, srcStride, &tmp[(lowmode - 2) * predsize], > >scaleTuSize) << costShift; > >- lowbits = !(mpms & ((uint64_t)1 << lowmode)) ? rbits : > >xModeBitsIntra(cu, lowmode, partOffset, depth); > >- lowcost = m_rdCost.calcRdSADCost(lowsad, lowbits); > >- if (amode < 34) > >+ > >+ /* refine best angle at distance 2, then distance 1 */ > >+ for (uint32_t dist = 2; dist >= 1; dist--) > > { > >- highmode = amode + 2; > >- modeHor = (highmode < 18); > >- cmp = (modeHor ? buf_trans : fenc); > >- srcStride = (modeHor ? scaleTuSize : scaleStride); > >- highsad = sa8d(cmp, srcStride, &tmp[(highmode - 2) * predsize], > >scaleTuSize) << costShift; > >- highbits = !(mpms & ((uint64_t)1 << highmode)) ? rbits : > >xModeBitsIntra(cu, highmode, partOffset, depth); > >- highcost = m_rdCost.calcRdSADCost(highsad, highbits); > >+ lowmode = amode - dist; > >+ highmode = amode + dist; > >+ > >+ X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out > >of range\n"); > >+ TRY_ANGLE(lowmode); > >+ COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, > >bits); > >+ > >+ X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode > >out of range\n"); > >+ TRY_ANGLE(highmode); > >+ COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, > >bits); > > } > >- if (lowcost <= highcost) > >+ > >+ if (amode == 33) > > { > >- mode = amode - 1; > >- COPY4_IF_LT(acost, lowcost, amode, lowmode, asad, lowsad, > >abits, lowbits); > >+ TRY_ANGLE(34); > >+ COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits); > > } > >- else > >- { > >- mode = amode + 1; > >- COPY4_IF_LT(acost, highcost, amode, highmode, asad, highsad, > >abits, highbits); > >- } > >- modeHor = (mode < 18); > >- cmp = (modeHor ? buf_trans : fenc); > >- srcStride = (modeHor ? scaleTuSize : scaleStride); > >- sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], > >scaleTuSize) << costShift; > >- bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, > >mode, partOffset, depth); > >- cost = m_rdCost.calcRdSADCost(sad, bits); > >- COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits); > >+ > > COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits); > > } > > else // calculate and search all intra prediction angles for lowest > > cost > > { > > for (mode = 2; mode < 35; mode++) > > { > >- modeHor = (mode < 18); > >- cmp = (modeHor ? buf_trans : fenc); > >- srcStride = (modeHor ? scaleTuSize : scaleStride); > >- sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], > >scaleTuSize) << costShift; > >- bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : > >xModeBitsIntra(cu, mode, partOffset, depth); > >- cost = m_rdCost.calcRdSADCost(sad, bits); > >+ TRY_ANGLE(mode); > > COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits); > > } > > } > >_______________________________________________ > >x265-devel mailing list > >x265-devel@videolan.org > >https://mailman.videolan.org/listinfo/x265-devel > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel