[x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate

2014-04-15 Thread Min Chen
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1397561438 -28800
# Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a
# Parent  1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
cleanup: reduce data size and dependency on MotionEstimate

diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800
@@ -111,8 +111,6 @@
 m_rdCost  = rdCost;
 
 initTempBuff(cfg-param-internalCsp);
-m_me.setSearchMethod(cfg-param-searchMethod);
-m_me.setSubpelRefine(cfg-param-subpelRefine);
 
 /* When frame parallelism is active, only 'refLagPixels' of reference 
frames will be guaranteed
  * available for motion reference.  See refLagRows in 
FrameEncoder::compressCTURows() */
@@ -2191,7 +2189,7 @@
 cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = 
m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx;
 
 motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, 
false);
-uint32_t costCand = 
m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), 
m_predTempYuv.getStride());
+uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, 
m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
 uint32_t bitsCand = mergeCand + 1;
 if (mergeCand == m_cfg-param-maxNumMergeCand - 1)
 {
@@ -2314,7 +2312,7 @@
 cu-clipMv(mvCand);
 
 xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, 
ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv);
-uint32_t cost = 
m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
+uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, 
m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
 cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS);
 
 if (bestCost  cost)
@@ -2328,11 +2326,11 @@
 
 int merange = m_cfg-param-searchRange;
 xSetSearchRange(cu, mvp, merange, mvmin, mvmax);
-int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
mvmax, mvp, numMvc, mvc, merange, outmv);
+int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, 
m_cfg-param-subpelRefine);
 
 /* Get total cost of partition, but only include MV bit cost 
once */
-bits += m_me.bitcost(outmv);
-uint32_t cost = (satdCost - m_me.mvcost(outmv)) + 
m_rdCost-getCost(bits);
+bits += m_me.bitcost(outmv, mvp);
+uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + 
m_rdCost-getCost(bits);
 
 /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
 xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, 
cost);
@@ -2368,7 +2366,7 @@
 
 int partEnum = partitionFromSizes(roiWidth, roiHeight);
 primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, 
m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32);
-int satdCost = m_me.bufSATD(avg, roiWidth);
+int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
 
 bidirBits = list[0].bits + list[1].bits + listSelBits[2] - 
(listSelBits[0] + listSelBits[1]);
 bidirCost = satdCost + m_rdCost-getCost(bidirBits);
@@ -2397,17 +2395,15 @@
 intptr_t refStride = m_mref[0][0]-lumaStride;
 
 primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, 
refStride, ref1, refStride, 32);
-satdCost = m_me.bufSATD(avg, roiWidth);
+satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
 
 MV mvp0 = list[0].mvp;
 int mvpIdx0 = list[0].mvpIdx;
-m_me.setMVP(mvp0);
-uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + 
m_me.bitcost(mvzero);
+uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) 
+ m_me.bitcost(mvzero, mvp0);
 
 MV mvp1 = list[1].mvp;
 int mvpIdx1 = list[1].mvpIdx;
-m_me.setMVP(mvp1);
-uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + 
m_me.bitcost(mvzero);
+uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) 
+ m_me.bitcost(mvzero, mvp1);
 
 uint32_t cost = satdCost + m_rdCost-getCost(bits0) + 
m_rdCost-getCost(bits1);
 
@@ -2556,9 +2552,8 @@
 {
 assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred);
 
-m_me.setMVP(mvPred);
 int bestMvpIdx = outMvpIdx;
-int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS;
+int mvBitsOrig = m_me.bitcost(mv, mvPred) + MVP_IDX_BITS;
 int bestMvBits = mvBitsOrig;
 
 for (int mvpIdx = 0; mvpIdx  AMVP_MAX_NUM_CANDS; mvpIdx++)
@@ 

Re: [x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 6:30 AM, Min Chen chenm...@163.com wrote:
 # HG changeset patch
 # User Min Chen chenm...@163.com
 # Date 1397561438 -28800
 # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a
 # Parent  1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
 cleanup: reduce data size and dependency on MotionEstimate

 diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800
 @@ -111,8 +111,6 @@
  m_rdCost  = rdCost;

  initTempBuff(cfg-param-internalCsp);
 -m_me.setSearchMethod(cfg-param-searchMethod);
 -m_me.setSubpelRefine(cfg-param-subpelRefine);

  /* When frame parallelism is active, only 'refLagPixels' of reference 
 frames will be guaranteed
   * available for motion reference.  See refLagRows in 
 FrameEncoder::compressCTURows() */
 @@ -2191,7 +2189,7 @@
  cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = 
 m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx;

  motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, 
 false);
 -uint32_t costCand = 
 m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), 
 m_predTempYuv.getStride());
 +uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, 
 m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
  uint32_t bitsCand = mergeCand + 1;
  if (mergeCand == m_cfg-param-maxNumMergeCand - 1)
  {
 @@ -2314,7 +2312,7 @@
  cu-clipMv(mvCand);

  xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, 
 ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv);
 -uint32_t cost = 
 m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
 +uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, 
 m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
  cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS);

  if (bestCost  cost)
 @@ -2328,11 +2326,11 @@

  int merange = m_cfg-param-searchRange;
  xSetSearchRange(cu, mvp, merange, mvmin, mvmax);
 -int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
 mvmax, mvp, numMvc, mvc, merange, outmv);
 +int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
 mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, 
 m_cfg-param-subpelRefine);

  /* Get total cost of partition, but only include MV bit cost 
 once */
 -bits += m_me.bitcost(outmv);
 -uint32_t cost = (satdCost - m_me.mvcost(outmv)) + 
 m_rdCost-getCost(bits);
 +bits += m_me.bitcost(outmv, mvp);
 +uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + 
 m_rdCost-getCost(bits);

  /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
  xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, 
 cost);
 @@ -2368,7 +2366,7 @@

  int partEnum = partitionFromSizes(roiWidth, roiHeight);
  primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, 
 m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32);
 -int satdCost = m_me.bufSATD(avg, roiWidth);
 +int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);

  bidirBits = list[0].bits + list[1].bits + listSelBits[2] - 
 (listSelBits[0] + listSelBits[1]);
  bidirCost = satdCost + m_rdCost-getCost(bidirBits);
 @@ -2397,17 +2395,15 @@
  intptr_t refStride = m_mref[0][0]-lumaStride;

  primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, 
 refStride, ref1, refStride, 32);
 -satdCost = m_me.bufSATD(avg, roiWidth);
 +satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);

  MV mvp0 = list[0].mvp;
  int mvpIdx0 = list[0].mvpIdx;
 -m_me.setMVP(mvp0);
 -uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + 
 m_me.bitcost(mvzero);
 +uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, 
 mvp0) + m_me.bitcost(mvzero, mvp0);

  MV mvp1 = list[1].mvp;
  int mvpIdx1 = list[1].mvpIdx;
 -m_me.setMVP(mvp1);
 -uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + 
 m_me.bitcost(mvzero);
 +uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, 
 mvp1) + m_me.bitcost(mvzero, mvp1);

  uint32_t cost = satdCost + m_rdCost-getCost(bits0) + 
 m_rdCost-getCost(bits1);

 @@ -2556,9 +2552,8 @@
  {
  assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred);

 -m_me.setMVP(mvPred);
  int bestMvpIdx = outMvpIdx;
 -int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS;
 +int mvBitsOrig =