Re: [x265] [PATCH RFC] analysis: add CU specific details to encodeCU()

2014-09-14 Thread Deepthi Nandakumar
On Fri, Sep 12, 2014 at 6:05 PM, santhosh...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Santhoshini Sekar santhosh...@multicorewareinc.com
 # Date 1410525310 -19800
 #  Fri Sep 12 18:05:10 2014 +0530
 # Node ID bf4ebe5df0cab013e4462597b55bd505b2a6a71a
 # Parent  7e29b10982d2eb7fd79f581d6f04184522ba
 analysis: add CU specific details to encodeCU()

 diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/analysis.cpp   Fri Sep 12 18:05:10 2014 +0530
 @@ -301,7 +301,6 @@
  {
  if (cu-m_slice-m_pps-bUseDQP)
  m_bEncodeDQP = true;
 -loadCTUData(cu);

  // initialize CU data
  m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
 diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.cpp
 --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.cppFri Sep 12 18:05:10 2014 +0530
 @@ -481,14 +481,14 @@
  }
  }

 -void Entropy::encodeCTU(TComDataCU* cu)
 +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData)
  {
  bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP;
 -encodeCU(cu, 0, 0, false, bEncodeDQP);
 +encodeCU(cu, 0, 0, bEncodeDQP, cuData);
  }

  /* encode a CU block recursively */
 -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bInsidePicture, bool bEncodeDQP)
 +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bEncodeDQP, CU* cuData)
  {
  Frame* pic = cu-m_pic;
  Slice* slice = cu-m_slice;
 @@ -496,30 +496,26 @@
  if (depth = slice-m_pps-maxCuDQPDepth  slice-m_pps-bUseDQP)
  bEncodeDQP = true;

 -if (!bInsidePicture)
 +int cu_split_flag = !(cuData-flags  CU::LEAF);
 +int cu_unsplit_flag = !(cuData-flags  CU::SPLIT_MANDATORY);
 +
 +uint32_t xmax = slice-m_sps-picWidthInLumaSamples  -
 cu-getCUPelX();
 +uint32_t ymax = slice-m_sps-picHeightInLumaSamples -
 cu-getCUPelY();
 +
 +if (!cu_unsplit_flag)
  {
 -uint32_t xmax = slice-m_sps-picWidthInLumaSamples  -
 cu-getCUPelX();
 -uint32_t ymax = slice-m_sps-picHeightInLumaSamples -
 cu-getCUPelY();
 -uint32_t cuSize = g_maxCUSize  depth;
 -
 -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax 
 -  g_zscanToPelY[absPartIdx] + cuSize = ymax);
 -
 -if (!bInsidePicture)
 +uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))  2;
 +for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
  {
 -uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))
  2;
 -for (uint32_t partUnitIdx = 0; partUnitIdx  4;
 partUnitIdx++, absPartIdx += qNumParts)
 -{
 -if (g_zscanToPelX[absPartIdx]  xmax 
 g_zscanToPelY[absPartIdx]  ymax)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 -}
 -
 -return;
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +if (g_zscanToPelX[absPartIdx]  xmax 
 g_zscanToPelY[absPartIdx]  ymax)


Most of the patch looks correct, but can't the above if-check also replaced
by one of the childCU flags?


 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
  }
 +return;
  }

  // We need to split, so don't try these modes.
 -if (bInsidePicture  depth  g_maxCUDepth)
 +if (cu_split_flag)
  codeSplitFlag(cu, absPartIdx, depth);

  if (depth  cu-getDepth(absPartIdx)  depth  g_maxCUDepth)
 @@ -527,7 +523,10 @@
  uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))  2;

  for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 +{
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
 +}
  return;
  }

 diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.h
 --- a/source/encoder/entropy.h  Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.h  Fri Sep 12 18:05:10 2014 +0530
 @@ -148,7 +148,7 @@
  void codeShortTermRefPicSet(RPS* rps);
  void finishSlice() { encodeBinTrm(1); finish();
 dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); }

 -void encodeCTU(TComDataCU* cu);
 +void encodeCTU(TComDataCU* cu, CU *cuData);
  void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
  void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int
 ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int
 allowMergeLeft, int allowMergeUp);
  void codeSaoMerge(uint32_t code)   { encodeBin(code,
 m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
 @@ -193,7 +193,7 @@
  void 

Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance

2014-09-14 Thread Deepthi Nandakumar
This significantly changes outputs for P and B frames. Higher bitrates and
higher SSIM. Lets do full regression testing on this - and compare the
bitrate/ssim for all combinations to be reasonably sure there are no bugs.

On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Ashok Kumar Mishraas...@multicorewareinc.com
 # Date 1410341620 -19800
 #  Wed Sep 10 15:03:40 2014 +0530
 # Node ID d8be3c38915d4a628b804522da8946a152041203
 # Parent  cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f
 Search: remove redundant encode coefficients in intra for performance

 diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Sep 11 17:25:40 2014 -0700
 +++ b/source/encoder/analysis.cpp   Wed Sep 10 15:03:40 2014 +0530
 @@ -1840,6 +1840,7 @@
  void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv,
 TComYuv* predYuv,  ShortYuv* outResiYuv, TComYuv* outReconYuv)
  {
  uint64_t puCost = 0;
 +uint32_t puBits = 0;
  uint32_t depth = cu-getDepth(0);
  uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;

 @@ -1851,7 +1852,7 @@
  uint32_t tuDepthRange[2];
  cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);

 -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
 predYuv, outResiYuv, false, puCost, tuDepthRange);
 +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
 predYuv, outResiYuv, false, puCost, puBits, tuDepthRange);
  xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);

  //=== update PU data 
 diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp
 --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700
 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530
 @@ -111,47 +111,6 @@
  return false;
  }

 -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t depthRange[2])
 -{
 -uint32_t fullDepth  = cu-getDepth(0) + trDepth;
 -uint32_t trMode = cu-getTransformIdx(absPartIdx);
 -uint32_t subdiv = (trMode  trDepth ? 1 : 0);
 -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
 -
 -if (cu-getPredictionMode(0) == MODE_INTRA  cu-getPartitionSize(0)
 == SIZE_NxN  trDepth == 0)
 -{
 -X265_CHECK(subdiv, subdivision not present\n);
 -}
 -else if (log2TrSize  *(depthRange + 1))
 -{
 -X265_CHECK(subdiv, subdivision not present\n);
 -}
 -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize)
 -{
 -X265_CHECK(!subdiv, subdivision present\n);
 -}
 -else if (log2TrSize == *depthRange)
 -{
 -X265_CHECK(!subdiv, subdivision present\n);
 -}
 -else
 -{
 -X265_CHECK(log2TrSize  *depthRange, transform size too
 small\n);
 -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
 -}
 -
 -if (subdiv)
 -{
 -uint32_t qtPartNum = cu-m_pic-getNumPartInCU()  ((fullDepth +
 1)  1);
 -for (uint32_t part = 0; part  4; part++)
 -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part *
 qtPartNum, depthRange);
 -
 -return;
 -}
 -
 -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
 -}
 -
  void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t
 height)
  {
  uint32_t fullDepth  = cu-getDepth(0) + trDepth;
 @@ -183,32 +142,6 @@
  }
  }

 -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx)
 -{
 -const TextType ttype = TEXT_LUMA;
 -
 -if (!cu-getCbf(absPartIdx, ttype, trDepth))
 -return;
 -
 -uint32_t fullDepth = cu-getDepth(0) + trDepth;
 -uint32_t trMode= cu-getTransformIdx(absPartIdx);
 -
 -if (trMode  trDepth)
 -{
 -uint32_t qtPartNum = cu-m_pic-getNumPartInCU()  ((fullDepth +
 1)  1);
 -for (uint32_t part = 0; part  4; part++)
 -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part *
 qtPartNum);
 -
 -return;
 -}
 -
 -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
 -uint32_t qtLayer= log2TrSize - 2;
 -uint32_t coeffOffset = absPartIdx  LOG2_UNIT_SIZE * 2;
 -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
 -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize,
 ttype);
 -}
 -
  void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx, TextType ttype)
  {
  if (!cu-getCbf(absPartIdx, ttype, trDepth))
 @@ -316,15 +249,6 @@
  }
  }

 -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t depthRange[2])
 -{
 -m_entropyCoder-resetBits();
 -xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
 -xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange);
 -xEncCoeffQTLuma(cu, trDepth, absPartIdx);
 -return