Re: [x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless

2014-08-26 Thread Deepthi Nandakumar
Thanks, Min. This is a solution but will affect performance with an extra
TComDataCU*. I have sent another patch where I'm just re-encoding the CU if
lossless is chosen as the best mode. This will not affect normal analysis.
Can you review that?

Deepthi


On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho st...@borho.org wrote:

 # HG changeset patch
 # User Min Chen chenm...@163.com
 # Date 1409002891 18000
 #  Mon Aug 25 16:41:31 2014 -0500
 # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264
 # Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
 analysis: fix inter hash mistake with --cu-lossless

 diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014
 +0900
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014
 -0500
 @@ -2293,7 +2293,7 @@
   * \returns void
   */
  void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv*
 fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,
 -   ShortYuv* outBestResiYuv,
 TComYuv* outReconYuv)
 +   ShortYuv* outBestResiYuv,
 TComYuv* outReconYuv, TComDataCU* tmpCu)
  {
  X265_CHECK(!cu-isIntra(0), intra CU not expected\n);

 @@ -2321,6 +2321,7 @@
  }

  uint64_t bestCost = MAX_INT64;
 +bool bestTransquantBypassFlag = bIsTQBypassEnable;

  for (uint32_t modeId = 0; modeId  numModes; modeId++)
  {
 @@ -2388,15 +2389,29 @@
  if (cu-getQtRootCbf(0))
  xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);

 +bestTransquantBypassFlag = bIsLosslessMode;
  bestBits = bits;
  bestCost = cost;
  bestCoeffBits = cu-m_coeffBits;
  m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
  }
 +
 +// Save lossless mode coeff
 +if (bIsLosslessMode)
 +{
 +tmpCu-copyPartFrom(cu, 0, depth, false);
 +}
  }

  X265_CHECK(bestCost != MAX_INT64, no best cost\n);

 +if (bestTransquantBypassFlag  !m_param-bLossless)
 +{
 +assert(log2CUSize  2);
 +cu-setCUTransquantBypassSubParts(true, 0, depth);
 +cu-copyPartFrom(tmpCu, 0, depth, false);
 +}
 +
  if (cu-getQtRootCbf(0))
  outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize);
  else
 diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h
 --- a/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 17:53:12 2014
 +0900
 +++ b/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 16:41:31 2014
 -0500
 @@ -147,7 +147,7 @@

  /// encode residual and compute rd-cost for inter mode
  void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
 TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
 -   TComYuv* reconYuv);
 +   TComYuv* reconYuv, TComDataCU* tmpCu);
  void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv,
 TComYuv* predYuv, TComYuv* reconYuv);

  void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx, TComYuv* fencYuv,
 diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Mon Aug 25 17:53:12 2014 +0900
 +++ b/source/encoder/analysis.cpp   Mon Aug 25 16:41:31 2014 -0500
 @@ -82,7 +82,7 @@
  uint32_t sizeL = cuSize * cuSize;
  uint32_t sizeC = sizeL  (CHROMA_H_SHIFT(csp) +
 CHROMA_V_SHIFT(csp));

 -ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8,
 tqBypass);
 +ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9,
 tqBypass);

  m_interCU_2Nx2N[i]  = new TComDataCU;
  m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize,
 csp, 0, tqBypass);
 @@ -108,6 +108,9 @@
  m_tempCU[i] = new TComDataCU;
  m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7,
 tqBypass);

 +m_tempLosslessCU[i] = new TComDataCU;
 +m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize,
 csp, 8, tqBypass);
 +
  m_bestPredYuv[i] = new TComYuv;
  ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp);

 @@ -158,6 +161,7 @@
  delete m_bestMergeCU[i];
  delete m_bestCU[i];
  delete m_tempCU[i];
 +delete m_tempLosslessCU[i];

  if (m_bestPredYuv  m_bestPredYuv[i])
  {
 @@ -240,6 +244,7 @@
  // initialize CU data
  m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
  m_tempCU[0]-initCU(cu-m_pic, cu-getAddr());
 +m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr());

  // analysis of CU
  uint32_t numPartition = cu-getTotalNumPart();
 @@ -394,6 +399,7 @@
  uint32_tnextDepth = depth + 1;
  TComDataCU* subBestPartCU = m_bestCU[nextDepth];
  TComDataCU* subTempPartCU = m_tempCU[nextDepth];
 +TComDataCU* 

[x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless

2014-08-25 Thread Steve Borho
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1409002891 18000
#  Mon Aug 25 16:41:31 2014 -0500
# Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264
# Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
analysis: fix inter hash mistake with --cu-lossless

diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500
@@ -2293,7 +2293,7 @@
  * \returns void
  */
 void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, 
TComYuv* predYuv, ShortYuv* outResiYuv,
-   ShortYuv* outBestResiYuv, TComYuv* 
outReconYuv)
+   ShortYuv* outBestResiYuv, TComYuv* 
outReconYuv, TComDataCU* tmpCu)
 {
 X265_CHECK(!cu-isIntra(0), intra CU not expected\n);
 
@@ -2321,6 +2321,7 @@
 }
 
 uint64_t bestCost = MAX_INT64;
+bool bestTransquantBypassFlag = bIsTQBypassEnable;
 
 for (uint32_t modeId = 0; modeId  numModes; modeId++)
 {
@@ -2388,15 +2389,29 @@
 if (cu-getQtRootCbf(0))
 xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);
 
+bestTransquantBypassFlag = bIsLosslessMode;
 bestBits = bits;
 bestCost = cost;
 bestCoeffBits = cu-m_coeffBits;
 m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
 }
+
+// Save lossless mode coeff
+if (bIsLosslessMode)
+{
+tmpCu-copyPartFrom(cu, 0, depth, false);
+}
 }
 
 X265_CHECK(bestCost != MAX_INT64, no best cost\n);
 
+if (bestTransquantBypassFlag  !m_param-bLossless)
+{
+assert(log2CUSize  2);
+cu-setCUTransquantBypassSubParts(true, 0, depth);
+cu-copyPartFrom(tmpCu, 0, depth, false);
+}
+
 if (cu-getQtRootCbf(0))
 outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize);
 else
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 17:53:12 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 16:41:31 2014 -0500
@@ -147,7 +147,7 @@
 
 /// encode residual and compute rd-cost for inter mode
 void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* 
predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
-   TComYuv* reconYuv);
+   TComYuv* reconYuv, TComDataCU* tmpCu);
 void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* 
predYuv, TComYuv* reconYuv);
 
 void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t 
absPartIdx, TComYuv* fencYuv,
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Mon Aug 25 17:53:12 2014 +0900
+++ b/source/encoder/analysis.cpp   Mon Aug 25 16:41:31 2014 -0500
@@ -82,7 +82,7 @@
 uint32_t sizeL = cuSize * cuSize;
 uint32_t sizeC = sizeL  (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
 
-ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, 
tqBypass);
+ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, 
tqBypass);
 
 m_interCU_2Nx2N[i]  = new TComDataCU;
 m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize, csp, 
0, tqBypass);
@@ -108,6 +108,9 @@
 m_tempCU[i] = new TComDataCU;
 m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7, 
tqBypass);
 
+m_tempLosslessCU[i] = new TComDataCU;
+m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 
8, tqBypass);
+
 m_bestPredYuv[i] = new TComYuv;
 ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp);
 
@@ -158,6 +161,7 @@
 delete m_bestMergeCU[i];
 delete m_bestCU[i];
 delete m_tempCU[i];
+delete m_tempLosslessCU[i];
 
 if (m_bestPredYuv  m_bestPredYuv[i])
 {
@@ -240,6 +244,7 @@
 // initialize CU data
 m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
 m_tempCU[0]-initCU(cu-m_pic, cu-getAddr());
+m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr());
 
 // analysis of CU
 uint32_t numPartition = cu-getTotalNumPart();
@@ -394,6 +399,7 @@
 uint32_tnextDepth = depth + 1;
 TComDataCU* subBestPartCU = m_bestCU[nextDepth];
 TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
 for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++)
 {
 int qp = outTempCU-getQP(0);
@@ -404,6 +410,7 @@
  (subBestPartCU-getCUPelY()  
slice-m_sps-picHeightInLumaSamples)))
 {
 subTempPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth, 
qp); // clear sub partition datas or init.
+