[x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate
# HG changeset patch # User Min Chen chenm...@163.com # Date 1397561438 -28800 # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a # Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb cleanup: reduce data size and dependency on MotionEstimate diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800 @@ -111,8 +111,6 @@ m_rdCost = rdCost; initTempBuff(cfg-param-internalCsp); -m_me.setSearchMethod(cfg-param-searchMethod); -m_me.setSubpelRefine(cfg-param-subpelRefine); /* When frame parallelism is active, only 'refLagPixels' of reference frames will be guaranteed * available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */ @@ -2191,7 +2189,7 @@ cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx; motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, false); -uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); +uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); uint32_t bitsCand = mergeCand + 1; if (mergeCand == m_cfg-param-maxNumMergeCand - 1) { @@ -2314,7 +2312,7 @@ cu-clipMv(mvCand); xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv); -uint32_t cost = m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); +uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS); if (bestCost cost) @@ -2328,11 +2326,11 @@ int merange = m_cfg-param-searchRange; xSetSearchRange(cu, mvp, merange, mvmin, mvmax); -int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv); +int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, m_cfg-param-subpelRefine); /* Get total cost of partition, but only include MV bit cost once */ -bits += m_me.bitcost(outmv); -uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost-getCost(bits); +bits += m_me.bitcost(outmv, mvp); +uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + m_rdCost-getCost(bits); /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */ xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, cost); @@ -2368,7 +2366,7 @@ int partEnum = partitionFromSizes(roiWidth, roiHeight); primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32); -int satdCost = m_me.bufSATD(avg, roiWidth); +int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); bidirBits = list[0].bits + list[1].bits + listSelBits[2] - (listSelBits[0] + listSelBits[1]); bidirCost = satdCost + m_rdCost-getCost(bidirBits); @@ -2397,17 +2395,15 @@ intptr_t refStride = m_mref[0][0]-lumaStride; primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, refStride, ref1, refStride, 32); -satdCost = m_me.bufSATD(avg, roiWidth); +satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); MV mvp0 = list[0].mvp; int mvpIdx0 = list[0].mvpIdx; -m_me.setMVP(mvp0); -uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + m_me.bitcost(mvzero); +uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0); MV mvp1 = list[1].mvp; int mvpIdx1 = list[1].mvpIdx; -m_me.setMVP(mvp1); -uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + m_me.bitcost(mvzero); +uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1); uint32_t cost = satdCost + m_rdCost-getCost(bits0) + m_rdCost-getCost(bits1); @@ -2556,9 +2552,8 @@ { assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred); -m_me.setMVP(mvPred); int bestMvpIdx = outMvpIdx; -int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS; +int mvBitsOrig = m_me.bitcost(mv, mvPred) + MVP_IDX_BITS; int bestMvBits = mvBitsOrig; for (int mvpIdx = 0; mvpIdx AMVP_MAX_NUM_CANDS; mvpIdx++) @@
Re: [x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate
On Tue, Apr 15, 2014 at 6:30 AM, Min Chen chenm...@163.com wrote: # HG changeset patch # User Min Chen chenm...@163.com # Date 1397561438 -28800 # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a # Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb cleanup: reduce data size and dependency on MotionEstimate diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800 @@ -111,8 +111,6 @@ m_rdCost = rdCost; initTempBuff(cfg-param-internalCsp); -m_me.setSearchMethod(cfg-param-searchMethod); -m_me.setSubpelRefine(cfg-param-subpelRefine); /* When frame parallelism is active, only 'refLagPixels' of reference frames will be guaranteed * available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */ @@ -2191,7 +2189,7 @@ cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx; motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, false); -uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); +uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); uint32_t bitsCand = mergeCand + 1; if (mergeCand == m_cfg-param-maxNumMergeCand - 1) { @@ -2314,7 +2312,7 @@ cu-clipMv(mvCand); xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv); -uint32_t cost = m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); +uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS); if (bestCost cost) @@ -2328,11 +2326,11 @@ int merange = m_cfg-param-searchRange; xSetSearchRange(cu, mvp, merange, mvmin, mvmax); -int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv); +int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, m_cfg-param-subpelRefine); /* Get total cost of partition, but only include MV bit cost once */ -bits += m_me.bitcost(outmv); -uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost-getCost(bits); +bits += m_me.bitcost(outmv, mvp); +uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + m_rdCost-getCost(bits); /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */ xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, cost); @@ -2368,7 +2366,7 @@ int partEnum = partitionFromSizes(roiWidth, roiHeight); primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32); -int satdCost = m_me.bufSATD(avg, roiWidth); +int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); bidirBits = list[0].bits + list[1].bits + listSelBits[2] - (listSelBits[0] + listSelBits[1]); bidirCost = satdCost + m_rdCost-getCost(bidirBits); @@ -2397,17 +2395,15 @@ intptr_t refStride = m_mref[0][0]-lumaStride; primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, refStride, ref1, refStride, 32); -satdCost = m_me.bufSATD(avg, roiWidth); +satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); MV mvp0 = list[0].mvp; int mvpIdx0 = list[0].mvpIdx; -m_me.setMVP(mvp0); -uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + m_me.bitcost(mvzero); +uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0); MV mvp1 = list[1].mvp; int mvpIdx1 = list[1].mvpIdx; -m_me.setMVP(mvp1); -uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + m_me.bitcost(mvzero); +uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1); uint32_t cost = satdCost + m_rdCost-getCost(bits0) + m_rdCost-getCost(bits1); @@ -2556,9 +2552,8 @@ { assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred); -m_me.setMVP(mvPred); int bestMvpIdx = outMvpIdx; -int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS; +int mvBitsOrig =