[x265] [PATCH] rc: write I/i slice in stats file based on whether openGop is enabled or not

2014-08-25 Thread aarthi
# HG changeset patch
# User Aarthi Thirumalai
# Date 1408946307 -19800
#  Mon Aug 25 11:28:27 2014 +0530
# Node ID c71cf2abe789b7bb7696201c9cff99c438109a0a
# Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
rc: write I/i slice in stats file based on whether openGop is enabled or not.

diff -r 6e6756f94b27 -r c71cf2abe789 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cppFri Aug 22 15:53:34 2014 -0500
+++ b/source/encoder/ratecontrol.cppMon Aug 25 11:28:27 2014 +0530
@@ -320,6 +320,7 @@
 m_startEndOrder.set(0);
 m_bTerminated = false;
 m_finalFrameCount = 0;
+m_numEntries = 0;
 if (m_param-rc.rateControlMode == X265_RC_CRF)
 {
 m_param-rc.qp = (int)m_param-rc.rfConstant;
@@ -1033,7 +1034,7 @@
 m_param-bframes = 1;
 return X265_TYPE_AUTO;
 }
-int frameType = m_rce2Pass[frameNum].sliceType == I_SLICE ? 
(m_rce2Pass[frameNum].poc == 0 ? X265_TYPE_I : X265_TYPE_IDR)
+int frameType = m_rce2Pass[frameNum].sliceType == I_SLICE ? (frameNum 
 0  m_param-bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR)
 : m_rce2Pass[frameNum].sliceType == P_SLICE ? 
X265_TYPE_P
 : (m_rce2Pass[frameNum].sliceType == B_SLICE  
m_rce2Pass[frameNum].keptAsRef? X265_TYPE_BREF : X265_TYPE_B);
 return frameType;
@@ -2080,7 +2081,7 @@
 // Write frame stats into the stats file if 2 pass is enabled.
 if (m_param-rc.bStatWrite)
 {
-char cType = rce-sliceType == I_SLICE ? (rce-poc == 0 ? 'I' : 'i')
+char cType = rce-sliceType == I_SLICE ? (rce-poc  0  
m_param-bOpenGOP ? 'i' : 'I')
 : rce-sliceType == P_SLICE ? 'P'
 : IS_REFERENCED(slice) ? 'B' : 'b';
 if (fprintf(m_statFileOut,
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] rc: don't read slicetypes from qpfile in 2nd pass

2014-08-25 Thread aarthi
# HG changeset patch
# User Aarthi Thirumalai
# Date 1408948191 -19800
#  Mon Aug 25 11:59:51 2014 +0530
# Node ID 7d9c4d6ca117eebaa838756a0b3562b4e3630e18
# Parent  c71cf2abe789b7bb7696201c9cff99c438109a0a
rc: don't read slicetypes from qpfile in 2nd pass.

slicetype has to be taken from the stats file from prev pass.

diff -r c71cf2abe789 -r 7d9c4d6ca117 source/x265.cpp
--- a/source/x265.cpp   Mon Aug 25 11:28:27 2014 +0530
+++ b/source/x265.cpp   Mon Aug 25 11:59:51 2014 +0530
@@ -833,7 +833,7 @@
 while (pic_in  !b_ctrl_c)
 {
 pic_orig.poc = inFrameCount;
-if (cliopt.qpfile)
+if (cliopt.qpfile  !param-rc.bStatRead)
 {
 if (!cliopt.parseQPFile(pic_orig))
 {
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

2014-08-25 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1408956792 -32400
#  Mon Aug 25 17:53:12 2014 +0900
# Node ID 7145e57c722a94a06faec33e3041442032a1892f
# Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Fri Aug 22 15:53:34 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Mon Aug 25 17:53:12 2014 +0900
@@ -816,12 +816,12 @@
 
 TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t 
curPartUnitIdx)
 {
+if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + 
UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples)
+return NULL;
+
 uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
 uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
-if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + 
UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples)
-return NULL;
-
 if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, 
numPartInCUSize))
 {
 if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize))
@@ -857,14 +857,11 @@
 
 TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t 
curPartUnitIdx)
 {
-uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
+if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + 
UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples)
+return NULL;
 
-if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + 
UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples)
-{
-return NULL;
-}
-
-uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
+uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
+uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
 if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, 
numPartInCUSize))
 {
@@ -895,15 +892,14 @@
 
 TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx,  uint32_t 
curPartUnitIdx, uint32_t partUnitOffset)
 {
-uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
-
-if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
+if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
 m_slice-m_sps-picHeightInLumaSamples)
 {
 return NULL;
 }
 
-uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
+uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
+uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
 if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 
partUnitOffset, numPartInCUSize))
 {
@@ -938,14 +934,13 @@
 
 TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t 
curPartUnitIdx, uint32_t partUnitOffset)
 {
-uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
-
-if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
+if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
 m_slice-m_sps-picWidthInLumaSamples)
 {
 return NULL;
 }
 
+uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
 uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
 if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 
partUnitOffset, numPartInCUSize))
@@ -954,7 +949,7 @@
 {
 if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT - 
numPartInCUSize + partUnitOffset])
 {
-uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
+uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize 
+ partUnitOffset];
 if (RasterAddress::isEqualRowOrCol(absPartIdxRT, 
absZorderCUIdx, numPartInCUSize))
 {
@@ -1817,48 +1812,42 @@
 }
 // TMVP always enabled
 {
-// MTK colocated-RightBottom
+MV colmv;
 uint32_t partIdxRB;
 
 deriveRightBottomIdx(puIdx, partIdxRB);
 
-uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB];
-uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
-
-MV colmv;
-int refIdx;
 int lcuIdx = -1;
 
-if ((m_pic-getCU(m_cuAddr)-getCUPelX() + 
g_rasterToPelX[absPartIdxTmp] + UNIT_SIZE) = 
m_slice-m_sps-picWidthInLumaSamples)  // image boundary check
+// image boundary check
+if (m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[partIdxRB] + 
UNIT_SIZE  m_slice-m_sps-picWidthInLumaSamples 
+m_pic-getCU(m_cuAddr)-getCUPelY() + 

Re: [x265] fix lossless

2014-08-25 Thread Steve Borho
On 08/25, Satoshi Nakagawa wrote:
 Ø  Negative shift or bts cause unexpected behavior.
 
 Negative shift is used for round value calculation.
 
 int round = 1  (shift - 1);

Ah, we need this operation to be a copy, there shouldn't be any rounding

 At 2014-08-25 10:06:23,Steve Borho st...@borho.org wrote:
 On 08/23, Satoshi Nakagawa wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com
  # Date 1408803114 -32400
  #  Sat Aug 23 23:11:54 2014 +0900
  # Node ID 218b9ddfe148c55afccc2c5a073858692e09f5c6
  # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
  fix lossless
  
 do you have a repro case that this fixes?  I tried Main and Main10
 encodes with --lossless --hash 1 and they were both fine prior to this
 patch.
  
  diff -r 6e6756f94b27 -r 218b9ddfe148 source/common/quant.cpp
  --- a/source/common/quant.cpp   Fri Aug 22 15:53:34 2014 -0500
  +++ b/source/common/quant.cpp   Sat Aug 23 23:11:54 2014 +0900
  @@ -399,7 +399,7 @@
   {
   if (transQuantBypass)
   {
  -primitives.cvt32to16_shr(residual, coeff, stride, 0, 1 
 log2TrSize);
  +primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff,
 stride, 0);
   return;
   }
   
  @@ -430,7 +430,7 @@
   #if X265_DEPTH = 10
   primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride,
 shift, trSize);
   #else
  -if (shift = 0)
  +if (shift  0)
   primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride,
 shift, trSize);
   else
   primitives.cvt32to16_shl[log2TrSize - 2](residual,
 m_resiDctCoeff, stride, -shift);
  ___
  x265-devel mailing list
  x265-devel@videolan.org
  https://mailman.videolan.org/listinfo/x265-devel
  
 -- 
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] Removed redundant code

2014-08-25 Thread dtyx265
# HG changeset patch
# User David T Yuen dtyx...@gmail.com
# Date 1408983545 25200
# Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40
# Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
Removed redundant code

diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Aug 22 15:53:34 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 09:19:05 2014 -0700
@@ -1537,34 +1537,6 @@
 }
 } // Mode loop
 
-// TODO: there is a lot of redundant work happening here, please clean 
this up!
-{
-uint32_t origMode = bestPUMode;
-
-cu-setLumaIntraDirSubParts(origMode, partOffset, depth + 
initTrDepth);
-
-// set context models
-m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
-
-// determine residual for partition
-uint32_t puDistY = 0;
-uint64_t puCost  = 0;
-xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, 
resiYuv, puDistY, false, puCost);
-
-// check r-d cost
-if (puCost  bestPUCost)
-{
-bestPUMode  = origMode;
-bestPUDistY = puDistY;
-
-xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
-
-::memcpy(m_qtTempTrIdx,  cu-getTransformIdx() + 
partOffset, qPartNum * sizeof(uint8_t));
-::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + 
partOffset, qPartNum * sizeof(uint8_t));
-::memcpy(m_qtTempTransformSkipFlag[0], 
cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t));
-}
-} // Mode loop
-
 //--- update overall distortion ---
 overallDistY += bestPUDistY;
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] rc: don't read slicetypes from qpfile in 2nd pass

2014-08-25 Thread Steve Borho
On 08/25, aar...@multicorewareinc.com wrote:
 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1408948191 -19800
 #  Mon Aug 25 11:59:51 2014 +0530
 # Node ID 7d9c4d6ca117eebaa838756a0b3562b4e3630e18
 # Parent  c71cf2abe789b7bb7696201c9cff99c438109a0a
 rc: don't read slicetypes from qpfile in 2nd pass.
 
 slicetype has to be taken from the stats file from prev pass.

This is essentially disabling the QP file entirely on the second pass,
which might be a perfectly reasonable thing to do, but if it is we
should probably issue some kind of warning saying the QP file is being
ignored.

 diff -r c71cf2abe789 -r 7d9c4d6ca117 source/x265.cpp
 --- a/source/x265.cpp Mon Aug 25 11:28:27 2014 +0530
 +++ b/source/x265.cpp Mon Aug 25 11:59:51 2014 +0530
 @@ -833,7 +833,7 @@
  while (pic_in  !b_ctrl_c)
  {
  pic_orig.poc = inFrameCount;
 -if (cliopt.qpfile)
 +if (cliopt.qpfile  !param-rc.bStatRead)
  {
  if (!cliopt.parseQPFile(pic_orig))
  {
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

2014-08-25 Thread Steve Borho
On 08/25, Satoshi Nakagawa wrote:
 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1408956792 -32400
 #  Mon Aug 25 17:53:12 2014 +0900
 # Node ID 7145e57c722a94a06faec33e3041442032a1892f
 # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
 replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

Queued for default, thanks.

There seems to be a lot of logic that checks for 'inside picture
bounds'. It seems like we could save a lot of CPU cycles if we padded
input pictures to the max-ctu size instead of the min-ctu size and
adjusted the conformance window accordingly.

 diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp
 --- a/source/Lib/TLibCommon/TComDataCU.cppFri Aug 22 15:53:34 2014 -0500
 +++ b/source/Lib/TLibCommon/TComDataCU.cppMon Aug 25 17:53:12 2014 +0900
 @@ -816,12 +816,12 @@
  
  TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t 
 curPartUnitIdx)
  {
 +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] 
 + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples)
 +return NULL;
 +
  uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
  uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
  
 -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] 
 + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples)
 -return NULL;
 -
  if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, 
 numPartInCUSize))
  {
  if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize))
 @@ -857,14 +857,11 @@
  
  TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t 
 curPartUnitIdx)
  {
 -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
 +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] 
 + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples)
 +return NULL;
  
 -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] 
 + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples)
 -{
 -return NULL;
 -}
 -
 -uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
 +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
 +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
  
  if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, 
 numPartInCUSize))
  {
 @@ -895,15 +892,14 @@
  
  TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx,  uint32_t 
 curPartUnitIdx, uint32_t partUnitOffset)
  {
 -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
 -
 -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] 
 + (partUnitOffset  LOG2_UNIT_SIZE)) =
 +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] 
 + (partUnitOffset  LOG2_UNIT_SIZE)) =
  m_slice-m_sps-picHeightInLumaSamples)
  {
  return NULL;
  }
  
 -uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
 +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
 +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
  
  if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 
 partUnitOffset, numPartInCUSize))
  {
 @@ -938,14 +934,13 @@
  
  TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t 
 curPartUnitIdx, uint32_t partUnitOffset)
  {
 -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
 -
 -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] 
 + (partUnitOffset  LOG2_UNIT_SIZE)) =
 +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] 
 + (partUnitOffset  LOG2_UNIT_SIZE)) =
  m_slice-m_sps-picWidthInLumaSamples)
  {
  return NULL;
  }
  
 +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
  uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
  
  if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 
 partUnitOffset, numPartInCUSize))
 @@ -954,7 +949,7 @@
  {
  if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT - 
 numPartInCUSize + partUnitOffset])
  {
 -uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + 
 (1  (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
 +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + 
 (1  (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
  arPartUnitIdx = g_rasterToZscan[absPartIdxRT - 
 numPartInCUSize + partUnitOffset];
  if (RasterAddress::isEqualRowOrCol(absPartIdxRT, 
 absZorderCUIdx, numPartInCUSize))
  {
 @@ -1817,48 +1812,42 @@
  }
  // TMVP always enabled
  {
 -// MTK colocated-RightBottom
 +MV colmv;
  uint32_t partIdxRB;
  
  deriveRightBottomIdx(puIdx, partIdxRB);
  
 -uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB];
 -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 -
 -   

[x265] [PATCH] Added check for mode 34

2014-08-25 Thread dtyx265
# HG changeset patch
# User David T Yuen dtyx...@gmail.com
# Date 1408987126 25200
# Node ID 157adb623151141d51c01bc085d563195f55e051
# Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
Added check for mode 34

For fast intra search in EstimateRow::estimateCUCost check intra mode 34

diff -r 6e6756f94b27 -r 157adb623151 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp  Fri Aug 22 15:53:34 2014 -0500
+++ b/source/encoder/slicetype.cpp  Mon Aug 25 10:18:46 2014 -0700
@@ -1722,6 +1722,11 @@
 cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * 
predsize], cuSize);
 COPY2_IF_LT(acost, cost, lowmode, mode);
 }
+if (lowmode == 33)
+{
+cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[34 * predsize], 
cuSize);
+COPY2_IF_LT(acost, cost, lowmode, mode);
+}
 if (acost  icost)
 icost = acost;
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Added check for mode 34

2014-08-25 Thread Steve Borho
On 08/25, dtyx...@gmail.com wrote:
 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1408987126 25200
 # Node ID 157adb623151141d51c01bc085d563195f55e051
 # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
 Added check for mode 34

this was a deliberate omission on my part, missing one possible mode is
ok for lookahead, where we're just trying to estimate intra cost and we
don't even keep track of what the best mode was.

 For fast intra search in EstimateRow::estimateCUCost check intra mode 34
 
 diff -r 6e6756f94b27 -r 157adb623151 source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cppFri Aug 22 15:53:34 2014 -0500
 +++ b/source/encoder/slicetype.cppMon Aug 25 10:18:46 2014 -0700
 @@ -1722,6 +1722,11 @@
  cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * 
 predsize], cuSize);
  COPY2_IF_LT(acost, cost, lowmode, mode);
  }
 +if (lowmode == 33)
 +{
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[34 * 
 predsize], cuSize);
 +COPY2_IF_LT(acost, cost, lowmode, mode);
 +}
  if (acost  icost)
  icost = acost;
  
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Removed redundant code

2014-08-25 Thread Steve Borho
On 08/25, dtyx...@gmail.com wrote:
 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1408983545 25200
 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40
 # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
 Removed redundant code

Queued, thanks

 diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp   Fri Aug 22 15:53:34 2014 -0500
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp   Mon Aug 25 09:19:05 2014 -0700
 @@ -1537,34 +1537,6 @@
  }
  } // Mode loop
  
 -// TODO: there is a lot of redundant work happening here, please 
 clean this up!
 -{
 -uint32_t origMode = bestPUMode;
 -
 -cu-setLumaIntraDirSubParts(origMode, partOffset, depth + 
 initTrDepth);
 -
 -// set context models
 -m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
 -
 -// determine residual for partition
 -uint32_t puDistY = 0;
 -uint64_t puCost  = 0;
 -xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, 
 predYuv, resiYuv, puDistY, false, puCost);
 -
 -// check r-d cost
 -if (puCost  bestPUCost)
 -{
 -bestPUMode  = origMode;
 -bestPUDistY = puDistY;
 -
 -xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
 -
 -::memcpy(m_qtTempTrIdx,  cu-getTransformIdx() + 
 partOffset, qPartNum * sizeof(uint8_t));
 -::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + 
 partOffset, qPartNum * sizeof(uint8_t));
 -::memcpy(m_qtTempTransformSkipFlag[0], 
 cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t));
 -}
 -} // Mode loop
 -
  //--- update overall distortion ---
  overallDistY += bestPUDistY;
  
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Removed redundant code

2014-08-25 Thread Steve Borho
On 08/25, dtyx...@gmail.com wrote:
 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1408983545 25200
 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40
 # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
 Removed redundant code

if I do an encode with --preset slower, the outputs change with this
patch. is this intentional?

 diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp   Fri Aug 22 15:53:34 2014 -0500
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp   Mon Aug 25 09:19:05 2014 -0700
 @@ -1537,34 +1537,6 @@
  }
  } // Mode loop
  
 -// TODO: there is a lot of redundant work happening here, please 
 clean this up!
 -{
 -uint32_t origMode = bestPUMode;
 -
 -cu-setLumaIntraDirSubParts(origMode, partOffset, depth + 
 initTrDepth);
 -
 -// set context models
 -m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
 -
 -// determine residual for partition
 -uint32_t puDistY = 0;
 -uint64_t puCost  = 0;
 -xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, 
 predYuv, resiYuv, puDistY, false, puCost);

I suspect some of the bits up to here are perhaps required unless the
last mode tried was the best mode.

 -// check r-d cost
 -if (puCost  bestPUCost)
 -{
 -bestPUMode  = origMode;
 -bestPUDistY = puDistY;
 -
 -xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
 -
 -::memcpy(m_qtTempTrIdx,  cu-getTransformIdx() + 
 partOffset, qPartNum * sizeof(uint8_t));
 -::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + 
 partOffset, qPartNum * sizeof(uint8_t));
 -::memcpy(m_qtTempTransformSkipFlag[0], 
 cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t));
 -}
 -} // Mode loop
 -
  //--- update overall distortion ---
  overallDistY += bestPUDistY;
  
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Removed redundant code

2014-08-25 Thread dave

On 08/25/2014 01:36 PM, Steve Borho wrote:

On 08/25, dtyx...@gmail.com wrote:

# HG changeset patch
# User David T Yuen dtyx...@gmail.com
# Date 1408983545 25200
# Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40
# Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
Removed redundant code

if I do an encode with --preset slower, the outputs change with this
patch. is this intentional?
It was not.  I only tested on a few vids with -I 1 to force intra 
prediction.  It produced bit-for bit identical results in less time.  I 
will look into --preset slower differences.



diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Aug 22 15:53:34 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 09:19:05 2014 -0700
@@ -1537,34 +1537,6 @@
  }
  } // Mode loop
  
-// TODO: there is a lot of redundant work happening here, please clean this up!

-{
-uint32_t origMode = bestPUMode;
-
-cu-setLumaIntraDirSubParts(origMode, partOffset, depth + 
initTrDepth);
-
-// set context models
-m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
-
-// determine residual for partition
-uint32_t puDistY = 0;
-uint64_t puCost  = 0;
-xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, 
resiYuv, puDistY, false, puCost);

I suspect some of the bits up to here are perhaps required unless the
last mode tried was the best mode.
Yes, xRecurIntraCodingQT is where the most work is done by this chunk of 
code so the difference is probably somewhere in there.



-// check r-d cost
-if (puCost  bestPUCost)
-{
-bestPUMode  = origMode;
-bestPUDistY = puDistY;
-
-xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
-
-::memcpy(m_qtTempTrIdx,  cu-getTransformIdx() + 
partOffset, qPartNum * sizeof(uint8_t));
-::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + 
partOffset, qPartNum * sizeof(uint8_t));
-::memcpy(m_qtTempTransformSkipFlag[0], 
cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t));
-}
-} // Mode loop
-
  //--- update overall distortion ---
  overallDistY += bestPUDistY;
  
___

x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
Is there any test script available that covers at least a good basic set 
of testcases?

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless

2014-08-25 Thread Steve Borho
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1409002891 18000
#  Mon Aug 25 16:41:31 2014 -0500
# Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264
# Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
analysis: fix inter hash mistake with --cu-lossless

diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500
@@ -2293,7 +2293,7 @@
  * \returns void
  */
 void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, 
TComYuv* predYuv, ShortYuv* outResiYuv,
-   ShortYuv* outBestResiYuv, TComYuv* 
outReconYuv)
+   ShortYuv* outBestResiYuv, TComYuv* 
outReconYuv, TComDataCU* tmpCu)
 {
 X265_CHECK(!cu-isIntra(0), intra CU not expected\n);
 
@@ -2321,6 +2321,7 @@
 }
 
 uint64_t bestCost = MAX_INT64;
+bool bestTransquantBypassFlag = bIsTQBypassEnable;
 
 for (uint32_t modeId = 0; modeId  numModes; modeId++)
 {
@@ -2388,15 +2389,29 @@
 if (cu-getQtRootCbf(0))
 xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);
 
+bestTransquantBypassFlag = bIsLosslessMode;
 bestBits = bits;
 bestCost = cost;
 bestCoeffBits = cu-m_coeffBits;
 m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
 }
+
+// Save lossless mode coeff
+if (bIsLosslessMode)
+{
+tmpCu-copyPartFrom(cu, 0, depth, false);
+}
 }
 
 X265_CHECK(bestCost != MAX_INT64, no best cost\n);
 
+if (bestTransquantBypassFlag  !m_param-bLossless)
+{
+assert(log2CUSize  2);
+cu-setCUTransquantBypassSubParts(true, 0, depth);
+cu-copyPartFrom(tmpCu, 0, depth, false);
+}
+
 if (cu-getQtRootCbf(0))
 outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize);
 else
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 17:53:12 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 16:41:31 2014 -0500
@@ -147,7 +147,7 @@
 
 /// encode residual and compute rd-cost for inter mode
 void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* 
predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
-   TComYuv* reconYuv);
+   TComYuv* reconYuv, TComDataCU* tmpCu);
 void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* 
predYuv, TComYuv* reconYuv);
 
 void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t 
absPartIdx, TComYuv* fencYuv,
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Mon Aug 25 17:53:12 2014 +0900
+++ b/source/encoder/analysis.cpp   Mon Aug 25 16:41:31 2014 -0500
@@ -82,7 +82,7 @@
 uint32_t sizeL = cuSize * cuSize;
 uint32_t sizeC = sizeL  (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
 
-ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, 
tqBypass);
+ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, 
tqBypass);
 
 m_interCU_2Nx2N[i]  = new TComDataCU;
 m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize, csp, 
0, tqBypass);
@@ -108,6 +108,9 @@
 m_tempCU[i] = new TComDataCU;
 m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7, 
tqBypass);
 
+m_tempLosslessCU[i] = new TComDataCU;
+m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 
8, tqBypass);
+
 m_bestPredYuv[i] = new TComYuv;
 ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp);
 
@@ -158,6 +161,7 @@
 delete m_bestMergeCU[i];
 delete m_bestCU[i];
 delete m_tempCU[i];
+delete m_tempLosslessCU[i];
 
 if (m_bestPredYuv  m_bestPredYuv[i])
 {
@@ -240,6 +244,7 @@
 // initialize CU data
 m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
 m_tempCU[0]-initCU(cu-m_pic, cu-getAddr());
+m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr());
 
 // analysis of CU
 uint32_t numPartition = cu-getTotalNumPart();
@@ -394,6 +399,7 @@
 uint32_tnextDepth = depth + 1;
 TComDataCU* subBestPartCU = m_bestCU[nextDepth];
 TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
 for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++)
 {
 int qp = outTempCU-getQP(0);
@@ -404,6 +410,7 @@
  (subBestPartCU-getCUPelY()  
slice-m_sps-picHeightInLumaSamples)))
 {
 subTempPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth, 
qp); // clear sub partition datas or init.
+

[x265] [PATCH 3 of 3] encoder: re-enable --cu-lossless

2014-08-25 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1409004465 18000
#  Mon Aug 25 17:07:45 2014 -0500
# Node ID 3aa688b03a0b71af6dfde9feb6b0d83178388fd1
# Parent  7e5709385119bb636abd2d198e40a243f1c2491f
encoder: re-enable --cu-lossless

diff -r 7e5709385119 -r 3aa688b03a0b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Aug 25 16:41:36 2014 -0500
+++ b/source/encoder/encoder.cppMon Aug 25 17:07:45 2014 -0500
@@ -1244,11 +1244,6 @@
 {
 x265_log(p, X265_LOG_INFO, Warning: picture-based SAO used with frame 
parallelism\n);
 }
-if (p-bCULossless)
-{
-x265_log(p, X265_LOG_WARNING, CU-Lossless is disabled in this release 
of x265\n);
-p-bCULossless = 0;
-}
 
 if (p-keyframeMax  0)
 {
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 2 of 3] search: fix decoder intra crash with --cu-lossless

2014-08-25 Thread Steve Borho
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1409002896 18000
#  Mon Aug 25 16:41:36 2014 -0500
# Node ID 7e5709385119bb636abd2d198e40a243f1c2491f
# Parent  0bf2756898bc78e5660a6b607b2f3cda97834264
search: fix decoder intra crash with --cu-lossless

diff -r 0bf2756898bc -r 7e5709385119 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:36 2014 -0500
@@ -572,6 +572,10 @@
 
 bool checkTQbypass = cu-m_slice-m_pps-bTransquantBypassEnabled  
!m_param-bLossless;
 
+// NOTE: transform_quant_bypass just at cu level
+if ((cu-m_slice-m_pps-bTransquantBypassEnabled)  
cu-getCUTransquantBypass(0) != checkTQbypass)
+checkTQbypass = cu-getCUTransquantBypass(0)  
!m_param-bLossless;
+
 uint32_t stride = fencYuv-getStride();
 pixel*   pred   = predYuv-getLumaAddr(absPartIdx);
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Removed redundant code

2014-08-25 Thread Steve Borho
On 08/25, dave wrote:
 On 08/25/2014 01:36 PM, Steve Borho wrote:
 On 08/25, dtyx...@gmail.com wrote:
 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1408983545 25200
 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40
 # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
 Removed redundant code
 if I do an encode with --preset slower, the outputs change with this
 patch. is this intentional?
 It was not.  I only tested on a few vids with -I 1 to force intra
 prediction.  It produced bit-for bit identical results in less time.  I will
 look into --preset slower differences.

I see, This particular function is only used for RD levels 4, 5, and 6

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

2014-08-25 Thread Deepthi Nandakumar
Ashok is already working on pre-calculating these inside-picture flags
along with more refactors. After his refactors are in, we can check whether
padding will improve performance.

In fact, very likely he already has a local version of the logic in this
patch.


On Mon, Aug 25, 2014 at 10:46 PM, Steve Borho st...@borho.org wrote:

 On 08/25, Satoshi Nakagawa wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com
  # Date 1408956792 -32400
  #  Mon Aug 25 17:53:12 2014 +0900
  # Node ID 7145e57c722a94a06faec33e3041442032a1892f
  # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
  replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

 Queued for default, thanks.

 There seems to be a lot of logic that checks for 'inside picture
 bounds'. It seems like we could save a lot of CPU cycles if we padded
 input pictures to the max-ctu size instead of the min-ctu size and
 adjusted the conformance window accordingly.

  diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp
  --- a/source/Lib/TLibCommon/TComDataCU.cppFri Aug 22 15:53:34 2014
 -0500
  +++ b/source/Lib/TLibCommon/TComDataCU.cppMon Aug 25 17:53:12 2014
 +0900
  @@ -816,12 +816,12 @@
 
   TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx,
 uint32_t curPartUnitIdx)
   {
  +if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) =
 m_slice-m_sps-picWidthInLumaSamples)
  +return NULL;
  +
   uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
   uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
  -if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) =
 m_slice-m_sps-picWidthInLumaSamples)
  -return NULL;
  -
   if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1,
 numPartInCUSize))
   {
   if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize))
  @@ -857,14 +857,11 @@
 
   TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx,
 uint32_t curPartUnitIdx)
   {
  -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
  +if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) =
 m_slice-m_sps-picHeightInLumaSamples)
  +return NULL;
 
  -if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) =
 m_slice-m_sps-picHeightInLumaSamples)
  -{
  -return NULL;
  -}
  -
  -uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
  +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
  +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
   if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1,
 numPartInCUSize))
   {
  @@ -895,15 +892,14 @@
 
   TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx,
 uint32_t curPartUnitIdx, uint32_t partUnitOffset)
   {
  -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
  -
  -if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_rasterToPelY[absPartIdxLB] + (partUnitOffset  LOG2_UNIT_SIZE)) =
  +if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_zscanToPelY[curPartUnitIdx] + (partUnitOffset  LOG2_UNIT_SIZE)) =
   m_slice-m_sps-picHeightInLumaSamples)
   {
   return NULL;
   }
 
  -uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
  +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
  +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
   if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize -
 partUnitOffset, numPartInCUSize))
   {
  @@ -938,14 +934,13 @@
 
   TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx,
 uint32_t curPartUnitIdx, uint32_t partUnitOffset)
   {
  -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
  -
  -if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_rasterToPelX[absPartIdxRT] + (partUnitOffset  LOG2_UNIT_SIZE)) =
  +if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_zscanToPelX[curPartUnitIdx] + (partUnitOffset  LOG2_UNIT_SIZE)) =
   m_slice-m_sps-picWidthInLumaSamples)
   {
   return NULL;
   }
 
  +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
   uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
   if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize -
 partUnitOffset, numPartInCUSize))
  @@ -954,7 +949,7 @@
   {
   if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT -
 numPartInCUSize + partUnitOffset])
   {
  -uint32_t absZorderCUIdx  =
 g_zscanToRaster[m_absIdxInLCU] + (1  (m_log2CUSize[0] - LOG2_UNIT_SIZE))
 - 1;
  +uint32_t absZorderCUIdx =
 g_zscanToRaster[m_absIdxInLCU] + (1  (m_log2CUSize[0] - LOG2_UNIT_SIZE))
 - 1;
   arPartUnitIdx = g_rasterToZscan[absPartIdxRT -
 numPartInCUSize + partUnitOffset];
   if (RasterAddress::isEqualRowOrCol(absPartIdxRT,