[x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit

2013-11-07 Thread sumalatha
# HG changeset patch
# User Sumalatha Polureddy
# Date 1383823751 -19800
# Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075
# Parent  0a1b379be359cbcf76140ac392104c856a037c78
no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early 
exit

Early exit is done when CU cost at depth n is lessthan sum of 60% of avgcost 
of all CU's
and 40% of avgcost of neighbour CU's at same depth.

diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp
--- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530
@@ -69,6 +69,14 @@
 m_ssimCnt = 0;
 m_frameTime = 0.0;
 m_elapsedCompressTime = 0.0;
+m_avgCost[0] = 0;
+m_avgCost[1] = 0;
+m_avgCost[2] = 0;
+m_avgCost[3] = 0;
+m_count[0] = 0;
+m_count[1] = 0;
+m_count[2] = 0;
+m_count[3] = 0;
 }
 
 TComPic::~TComPic()
diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h
--- a/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 18:17:52 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 16:59:11 2013 +0530
@@ -95,6 +95,8 @@
 MD5Contextm_state[3];
 uint32_t  m_crc[3];
 uint32_t  m_checksum[3];
+UInt64m_avgCost[4];
+uint32_t  m_count[4];
 
 /* SSIM values per frame */
 doublem_ssim;
diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp   Thu Nov 07 18:17:52 2013 +0800
+++ b/source/encoder/compress.cpp   Thu Nov 07 16:59:11 2013 +0530
@@ -567,13 +567,14 @@
 if (bSubBranch  bTrySplitDQP  depth  g_maxCUDepth - g_addCUDepth)
 {
 #if EARLY_EXIT // turn ON this to enable early exit
-// early exit when the RD cost of best mode at depth n is less than 
the avgerage of RD cost of the
-// CU's(above, aboveleft, aboveright, left, colocated) at depth n of 
previosuly coded CU's
+// early exit when the RD cost of best mode at depth n is less than 
the sum of avgerage of RD cost of the neighbour 
+// CU's(above, aboveleft, aboveright, left, colocated) and all CU's at 
depth n  with weightage for each quantity
 if (outBestCU != 0)
 {
-UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, 
costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 
0, totalCost = 0, avgCost = 0;
+UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, 
costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 
0, totalCostNeigh = 0, totalCostAll = 0;
+double avgCost = 0;
 UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, 
countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, 
countCUColocated1 = 0;
-UInt64 totalCount = 0;
+UInt64 totalCountNeigh = 0, totalCountAll = 0;
 TComDataCU* above = outTempCU-getCUAbove();
 TComDataCU* aboveLeft = outTempCU-getCUAboveLeft();
 TComDataCU* aboveRight = outTempCU-getCUAboveRight();
@@ -614,10 +615,15 @@
 countCUColocated1 = colocated1-m_count[depth];
 }
 
-totalCost = costCU + costCUAbove + costCUAboveLeft + 
costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
-totalCount = countCU + countCUAbove + countCUAboveLeft + 
countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
-if (totalCount != 0)
-avgCost = totalCost / totalCount;
+totalCostNeigh = costCU + costCUAbove + costCUAboveLeft + 
costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
+totalCountNeigh = countCU + countCUAbove + countCUAboveLeft + 
countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
+
+totalCostAll = (outTempCU-getPic()-m_avgCost[depth] * 
outTempCU-getPic()-m_count[depth]) - totalCostNeigh;
+totalCountAll = outTempCU-getPic()-m_count[depth] - 
totalCountNeigh;
+
+//giving 60% weight to all CU's and 40% weight to neighbour CU's
+if (totalCountAll)
+avgCost = ((0.6 * totalCostAll) + (0.4 * totalCostNeigh)) / 
((0.6 * totalCountAll) + (0.4 * totalCountNeigh));
 
 float lambda = 1.0f;
 
@@ -672,6 +678,9 @@
 
outTempCU-getPic()-getPicSym()-getCU(outTempCU-getAddr())-m_count[depth + 
1] += 1;
 outTempCU-m_avgCost[depth + 1] = (temp + tempavgCost) / 
outTempCU-m_count[depth + 1];
 
outTempCU-getPic()-getPicSym()-getCU(outTempCU-getAddr())-m_avgCost[depth 
+ 1] = outTempCU-m_avgCost[depth + 1];
+temp = outTempCU-getPic()-m_avgCost[depth+1] * 
outTempCU-getPic()-m_count[depth+1];
+outTempCU-getPic()-m_count[depth+1] += 1;
+outTempCU-getPic()-m_avgCost[depth+1] = (temp + 
tempavgCost) / 

Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit

2013-11-07 Thread Steve Borho
On Thu, Nov 7, 2013 at 5:29 AM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddy
 # Date 1383823751 -19800
 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075
 # Parent  0a1b379be359cbcf76140ac392104c856a037c78
 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for
 early exit

 Early exit is done when CU cost at depth n is lessthan sum of 60% of
 avgcost of all CU's
 and 40% of avgcost of neighbour CU's at same depth.

 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp
 --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530
 @@ -69,6 +69,14 @@
  m_ssimCnt = 0;
  m_frameTime = 0.0;
  m_elapsedCompressTime = 0.0;
 +m_avgCost[0] = 0;
 +m_avgCost[1] = 0;
 +m_avgCost[2] = 0;
 +m_avgCost[3] = 0;
 +m_count[0] = 0;
 +m_count[1] = 0;
 +m_count[2] = 0;
 +m_count[3] = 0;
  }

  TComPic::~TComPic()
 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h
 --- a/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 16:59:11 2013 +0530
 @@ -95,6 +95,8 @@
  MD5Contextm_state[3];
  uint32_t  m_crc[3];
  uint32_t  m_checksum[3];
 +UInt64m_avgCost[4];


we should be using uint64_t for new code that we write


 +uint32_t  m_count[4];

  /* SSIM values per frame */
  doublem_ssim;
 diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/encoder/compress.cpp   Thu Nov 07 16:59:11 2013 +0530
 @@ -567,13 +567,14 @@
  if (bSubBranch  bTrySplitDQP  depth  g_maxCUDepth - g_addCUDepth)
  {
  #if EARLY_EXIT // turn ON this to enable early exit
 -// early exit when the RD cost of best mode at depth n is less
 than the avgerage of RD cost of the
 -// CU's(above, aboveleft, aboveright, left, colocated) at depth
 n of previosuly coded CU's
 +// early exit when the RD cost of best mode at depth n is less
 than the sum of avgerage of RD cost of the neighbour
 +// CU's(above, aboveleft, aboveright, left, colocated) and all
 CU's at depth n  with weightage for each quantity
  if (outBestCU != 0)
  {
 -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCost = 0, avgCost = 0;
 +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0;
 +double avgCost = 0;
  UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0,
 countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0,
 countCUColocated1 = 0;
 -UInt64 totalCount = 0;
 +UInt64 totalCountNeigh = 0, totalCountAll = 0;


ditto for these, and these lines should be broken up at around 100 chars


  TComDataCU* above = outTempCU-getCUAbove();
  TComDataCU* aboveLeft = outTempCU-getCUAboveLeft();
  TComDataCU* aboveRight = outTempCU-getCUAboveRight();
 @@ -614,10 +615,15 @@
  countCUColocated1 = colocated1-m_count[depth];
  }

 -totalCost = costCU + costCUAbove + costCUAboveLeft +
 costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
 -totalCount = countCU + countCUAbove + countCUAboveLeft +
 countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
 -if (totalCount != 0)
 -avgCost = totalCost / totalCount;
 +totalCostNeigh = costCU + costCUAbove + costCUAboveLeft +
 costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
 +totalCountNeigh = countCU + countCUAbove + countCUAboveLeft +
 countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;


do we need all these counters?  It looks like we could just increment
totalCost and totalCount within each if() clause?


 +
 +totalCostAll = (outTempCU-getPic()-m_avgCost[depth] *
 outTempCU-getPic()-m_count[depth]) - totalCostNeigh;
 +totalCountAll = outTempCU-getPic()-m_count[depth] -
 totalCountNeigh;
 +
 +//giving 60% weight to all CU's and 40% weight to neighbour
 CU's
 +if (totalCountAll)
 +avgCost = ((0.6 * totalCostAll) + (0.4 * totalCostNeigh))
 / ((0.6 * totalCountAll) + (0.4 * totalCountNeigh));

  float lambda = 1.0f;

 @@ -672,6 +678,9 @@

  outTempCU-getPic()-getPicSym()-getCU(outTempCU-getAddr())-m_count[depth
 + 1] += 1;
  outTempCU-m_avgCost[depth + 1] = (temp +
 tempavgCost) / outTempCU-m_count[depth + 1];

  
 

Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit

2013-11-07 Thread Deepthi Nandakumar
I have a few questions.

1. Do we need so many local variables?

2. Why are we adding outTempCU-cost to totalCost and then comparing
against outBestCU-cost? That doesnt make much sense to me. AFAIk,
outTempCU does not contain any valid data - we should remove this.

3. Should we be adding costCUColocated0 and costCUColocated1 also? Adding
up spatial and temporal costs, and then comparing against a threshold
derived from spatial costs - umm, no. Lets leave these out.

4. The rest of it looks ok, logically. But now you may need to re-tune this
with different weights.

Best,
Deepthi









On Thu, Nov 7, 2013 at 4:59 PM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddy
 # Date 1383823751 -19800
 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075
 # Parent  0a1b379be359cbcf76140ac392104c856a037c78
 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for
 early exit

 Early exit is done when CU cost at depth n is lessthan sum of 60% of
 avgcost of all CU's
 and 40% of avgcost of neighbour CU's at same depth.

 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp
 --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530
 @@ -69,6 +69,14 @@
  m_ssimCnt = 0;
  m_frameTime = 0.0;
  m_elapsedCompressTime = 0.0;
 +m_avgCost[0] = 0;
 +m_avgCost[1] = 0;
 +m_avgCost[2] = 0;
 +m_avgCost[3] = 0;
 +m_count[0] = 0;
 +m_count[1] = 0;
 +m_count[2] = 0;
 +m_count[3] = 0;
  }

  TComPic::~TComPic()
 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h
 --- a/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 16:59:11 2013 +0530
 @@ -95,6 +95,8 @@
  MD5Contextm_state[3];
  uint32_t  m_crc[3];
  uint32_t  m_checksum[3];
 +UInt64m_avgCost[4];
 +uint32_t  m_count[4];

  /* SSIM values per frame */
  doublem_ssim;
 diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/encoder/compress.cpp   Thu Nov 07 16:59:11 2013 +0530
 @@ -567,13 +567,14 @@
  if (bSubBranch  bTrySplitDQP  depth  g_maxCUDepth - g_addCUDepth)
  {
  #if EARLY_EXIT // turn ON this to enable early exit
 -// early exit when the RD cost of best mode at depth n is less
 than the avgerage of RD cost of the
 -// CU's(above, aboveleft, aboveright, left, colocated) at depth
 n of previosuly coded CU's
 +// early exit when the RD cost of best mode at depth n is less
 than the sum of avgerage of RD cost of the neighbour
 +// CU's(above, aboveleft, aboveright, left, colocated) and all
 CU's at depth n  with weightage for each quantity
  if (outBestCU != 0)
  {
 -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCost = 0, avgCost = 0;
 +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0;
 +double avgCost = 0;
  UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0,
 countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0,
 countCUColocated1 = 0;
 -UInt64 totalCount = 0;
 +UInt64 totalCountNeigh = 0, totalCountAll = 0;
  TComDataCU* above = outTempCU-getCUAbove();
  TComDataCU* aboveLeft = outTempCU-getCUAboveLeft();
  TComDataCU* aboveRight = outTempCU-getCUAboveRight();
 @@ -614,10 +615,15 @@
  countCUColocated1 = colocated1-m_count[depth];
  }

 -totalCost = costCU + costCUAbove + costCUAboveLeft +
 costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
 -totalCount = countCU + countCUAbove + countCUAboveLeft +
 countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
 -if (totalCount != 0)
 -avgCost = totalCost / totalCount;
 +totalCostNeigh = costCU + costCUAbove + costCUAboveLeft +
 costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
 +totalCountNeigh = countCU + countCUAbove + countCUAboveLeft +
 countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
 +
 +totalCostAll = (outTempCU-getPic()-m_avgCost[depth] *
 outTempCU-getPic()-m_count[depth]) - totalCostNeigh;
 +totalCountAll = outTempCU-getPic()-m_count[depth] -
 totalCountNeigh;
 +
 +//giving 60% weight to all CU's and 40% weight to neighbour
 CU's
 +if (totalCountAll)
 +avgCost = ((0.6 * totalCostAll) + (0.4 * 

Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit

2013-11-07 Thread Sumalatha Polureddy
On Fri, Nov 8, 2013 at 8:03 AM, Deepthi Nandakumar 
deep...@multicorewareinc.com wrote:

 I have a few questions.

 1. Do we need so many local variables?

As Steve suggested,  totalCostNeigh  and totalCountNeigh  can be added
inside each if () and will try to remove costCU, costCUAbove ,
costCUAboveLeft, costCUAboveRight, costCULeft , costCUColocated0 ,
costCUColocated1 and their corresponding count variables also


 2. Why are we adding outTempCU-cost to totalCost and then comparing
 against outBestCU-cost? That doesnt make much sense to me. AFAIk,
 outTempCU does not contain any valid data - we should remove this.

Actually we are taking outTempCU-getPic()-m_avgCost[depth] to total cost
and comparing with outBestCU-cost.
outTempCU-getPic()-m_avgCost[depth]  will have avg cost of all the CU's
so far encoded in the frame.



 3. Should we be adding costCUColocated0 and costCUColocated1 also? Adding
 up spatial and temporal costs, and then comparing against a threshold
 derived from spatial costs - umm, no. Lets leave these out.

will remove both colocated0 and colocated1



 4. The rest of it looks ok, logically. But now you may need to re-tune
 this with different weights.

 Best,
 Deepthi









 On Thu, Nov 7, 2013 at 4:59 PM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddy
 # Date 1383823751 -19800
 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075
 # Parent  0a1b379be359cbcf76140ac392104c856a037c78
 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for
 early exit

 Early exit is done when CU cost at depth n is lessthan sum of 60% of
 avgcost of all CU's
 and 40% of avgcost of neighbour CU's at same depth.

 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp
 --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530
 @@ -69,6 +69,14 @@
  m_ssimCnt = 0;
  m_frameTime = 0.0;
  m_elapsedCompressTime = 0.0;
 +m_avgCost[0] = 0;
 +m_avgCost[1] = 0;
 +m_avgCost[2] = 0;
 +m_avgCost[3] = 0;
 +m_count[0] = 0;
 +m_count[1] = 0;
 +m_count[2] = 0;
 +m_count[3] = 0;
  }

  TComPic::~TComPic()
 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h
 --- a/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 16:59:11 2013 +0530
 @@ -95,6 +95,8 @@
  MD5Contextm_state[3];
  uint32_t  m_crc[3];
  uint32_t  m_checksum[3];
 +UInt64m_avgCost[4];
 +uint32_t  m_count[4];

  /* SSIM values per frame */
  doublem_ssim;
 diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/encoder/compress.cpp   Thu Nov 07 16:59:11 2013 +0530
 @@ -567,13 +567,14 @@
  if (bSubBranch  bTrySplitDQP  depth  g_maxCUDepth -
 g_addCUDepth)
  {
  #if EARLY_EXIT // turn ON this to enable early exit
 -// early exit when the RD cost of best mode at depth n is less
 than the avgerage of RD cost of the
 -// CU's(above, aboveleft, aboveright, left, colocated) at depth
 n of previosuly coded CU's
 +// early exit when the RD cost of best mode at depth n is less
 than the sum of avgerage of RD cost of the neighbour
 +// CU's(above, aboveleft, aboveright, left, colocated) and all
 CU's at depth n  with weightage for each quantity
  if (outBestCU != 0)
  {
 -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCost = 0, avgCost = 0;
 +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0;
 +double avgCost = 0;
  UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0,
 countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0,
 countCUColocated1 = 0;
 -UInt64 totalCount = 0;
 +UInt64 totalCountNeigh = 0, totalCountAll = 0;
  TComDataCU* above = outTempCU-getCUAbove();
  TComDataCU* aboveLeft = outTempCU-getCUAboveLeft();
  TComDataCU* aboveRight = outTempCU-getCUAboveRight();
 @@ -614,10 +615,15 @@
  countCUColocated1 = colocated1-m_count[depth];
  }

 -totalCost = costCU + costCUAbove + costCUAboveLeft +
 costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
 -totalCount = countCU + countCUAbove + countCUAboveLeft +
 countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
 -if (totalCount != 0)
 -avgCost = totalCost / totalCount;
 +totalCostNeigh = costCU + costCUAbove