On Tue, Apr 15, 2014 at 6:30 AM, Min Chen <[email protected]> wrote: > # HG changeset patch > # User Min Chen <[email protected]> > # Date 1397561438 -28800 > # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a > # Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb > cleanup: reduce data size and dependency on MotionEstimate > > diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp > --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500 > +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800 > @@ -111,8 +111,6 @@ > m_rdCost = rdCost; > > initTempBuff(cfg->param->internalCsp); > - m_me.setSearchMethod(cfg->param->searchMethod); > - m_me.setSubpelRefine(cfg->param->subpelRefine); > > /* When frame parallelism is active, only 'refLagPixels' of reference > frames will be guaranteed > * available for motion reference. See refLagRows in > FrameEncoder::compressCTURows() */ > @@ -2191,7 +2189,7 @@ > cu->getCUMvField(REF_PIC_LIST_1)->m_refIdx[m.absPartIdx] = > m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx; > > motionCompensation(cu, &m_predTempYuv, REF_PIC_LIST_X, puIdx, true, > false); > - uint32_t costCand = > m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), > m_predTempYuv.getStride()); > + uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, > m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); > uint32_t bitsCand = mergeCand + 1; > if (mergeCand == m_cfg->param->maxNumMergeCand - 1) > { > @@ -2314,7 +2312,7 @@ > cu->clipMv(mvCand); > > xPredInterLumaBlk(cu, cu->getSlice()->getRefPic(l, > ref)->getPicYuvRec(), partAddr, &mvCand, roiWidth, roiHeight, &m_predTempYuv); > - uint32_t cost = > m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); > + uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, > m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); > cost = m_rdCost->calcRdSADCost(cost, MVP_IDX_BITS); > > if (bestCost > cost) > @@ -2328,11 +2326,11 @@ > > int merange = m_cfg->param->searchRange; > xSetSearchRange(cu, mvp, merange, mvmin, mvmax); > - int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, > mvmax, mvp, numMvc, mvc, merange, outmv); > + int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, > mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg->param->searchMethod, > m_cfg->param->subpelRefine); > > /* Get total cost of partition, but only include MV bit cost > once */ > - bits += m_me.bitcost(outmv); > - uint32_t cost = (satdCost - m_me.mvcost(outmv)) + > m_rdCost->getCost(bits); > + bits += m_me.bitcost(outmv, mvp); > + uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + > m_rdCost->getCost(bits); > > /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */ > xCheckBestMVP(&amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, > cost); > @@ -2368,7 +2366,7 @@ > > int partEnum = partitionFromSizes(roiWidth, roiHeight); > primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, > m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32); > - int satdCost = m_me.bufSATD(avg, roiWidth); > + int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); > > bidirBits = list[0].bits + list[1].bits + listSelBits[2] - > (listSelBits[0] + listSelBits[1]); > bidirCost = satdCost + m_rdCost->getCost(bidirBits); > @@ -2397,17 +2395,15 @@ > intptr_t refStride = m_mref[0][0]->lumaStride; > > primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, > refStride, ref1, refStride, 32); > - satdCost = m_me.bufSATD(avg, roiWidth); > + satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); > > MV mvp0 = list[0].mvp; > int mvpIdx0 = list[0].mvpIdx; > - m_me.setMVP(mvp0); > - uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + > m_me.bitcost(mvzero); > + uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, > mvp0) + m_me.bitcost(mvzero, mvp0); > > MV mvp1 = list[1].mvp; > int mvpIdx1 = list[1].mvpIdx; > - m_me.setMVP(mvp1); > - uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + > m_me.bitcost(mvzero); > + uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, > mvp1) + m_me.bitcost(mvzero, mvp1); > > uint32_t cost = satdCost + m_rdCost->getCost(bits0) + > m_rdCost->getCost(bits1); > > @@ -2556,9 +2552,8 @@ > { > assert(amvpInfo->m_mvCand[outMvpIdx] == mvPred); > > - m_me.setMVP(mvPred); > int bestMvpIdx = outMvpIdx; > - int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS; > + int mvBitsOrig = m_me.bitcost(mv, mvPred) + MVP_IDX_BITS; > int bestMvBits = mvBitsOrig; > > for (int mvpIdx = 0; mvpIdx < AMVP_MAX_NUM_CANDS; mvpIdx++) > @@ -2566,8 +2561,7 @@ > if (mvpIdx == outMvpIdx) > continue; > > - m_me.setMVP(amvpInfo->m_mvCand[mvpIdx]); > - int mvbits = m_me.bitcost(mv) + MVP_IDX_BITS; > + int mvbits = m_me.bitcost(mv, amvpInfo->m_mvCand[mvpIdx]) + > MVP_IDX_BITS; > > if (mvbits < bestMvBits) > { > diff -r 1cf67a7b362d -r dd78d554f78d source/encoder/bitcost.h > --- a/source/encoder/bitcost.h Mon Apr 14 21:26:37 2014 -0500 > +++ b/source/encoder/bitcost.h Tue Apr 15 19:30:38 2014 +0800 > @@ -35,36 +35,26 @@ > { > public: > > - BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0) {} > + BitCost() : m_cost(NULL) {} > > void setQP(unsigned int qp); > > - void setMVP(const MV& mvp) { m_mvp = mvp; > m_cost_mvx = m_cost - mvp.x; m_cost_mvy = m_cost - mvp.y; } > - > // return bit cost of motion vector difference, multiplied by lambda > - inline uint16_t mvcost(const MV& mv) const { return > m_cost_mvx[mv.x] + m_cost_mvy[mv.y]; } > + inline uint16_t mvcost(const MV mv, const MV mvp) const { return > m_cost[mv.x - mvp.x] + m_cost[mv.y - mvp.y]; } > > // return bit cost of motion vector difference, without lambda > - inline uint16_t bitcost(const MV& mv) const > + inline uint16_t bitcost(const MV mv, const MV mvp) const > { > - return (uint16_t)(s_bitsizes[(abs(mv.x - m_mvp.x) << 1) + !!(mv.x < > m_mvp.x)] + > - s_bitsizes[(abs(mv.y - m_mvp.y) << 1) + !!(mv.y < > m_mvp.y)] + 0.5f); > + return (uint16_t)(s_bitsizes[(abs(mv.x - mvp.x) << 1) + !!(mv.x < > mvp.x)] + > + s_bitsizes[(abs(mv.y - mvp.y) << 1) + !!(mv.y < > mvp.y)] + 0.5f); > } > > static void destroy(); > > protected: > > - uint16_t *m_cost_mvx; > - > - uint16_t *m_cost_mvy; > -
This seems an odd trade-off to me, saving 16 bytes to add an additional two subtract operations to every MV cost evaluation. Does this change measure well as a performance improvement? -- Steve Borho _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
