[x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit
# HG changeset patch # User Sumalatha Polureddy # Date 1383823751 -19800 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075 # Parent 0a1b379be359cbcf76140ac392104c856a037c78 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit Early exit is done when CU cost at depth n is lessthan sum of 60% of avgcost of all CU's and 40% of avgcost of neighbour CU's at same depth. diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -69,6 +69,14 @@ m_ssimCnt = 0; m_frameTime = 0.0; m_elapsedCompressTime = 0.0; +m_avgCost[0] = 0; +m_avgCost[1] = 0; +m_avgCost[2] = 0; +m_avgCost[3] = 0; +m_count[0] = 0; +m_count[1] = 0; +m_count[2] = 0; +m_count[3] = 0; } TComPic::~TComPic() diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h --- a/source/Lib/TLibCommon/TComPic.h Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.h Thu Nov 07 16:59:11 2013 +0530 @@ -95,6 +95,8 @@ MD5Contextm_state[3]; uint32_t m_crc[3]; uint32_t m_checksum[3]; +UInt64m_avgCost[4]; +uint32_t m_count[4]; /* SSIM values per frame */ doublem_ssim; diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/encoder/compress.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -567,13 +567,14 @@ if (bSubBranch bTrySplitDQP depth g_maxCUDepth - g_addCUDepth) { #if EARLY_EXIT // turn ON this to enable early exit -// early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the -// CU's(above, aboveleft, aboveright, left, colocated) at depth n of previosuly coded CU's +// early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour +// CU's(above, aboveleft, aboveright, left, colocated) and all CU's at depth n with weightage for each quantity if (outBestCU != 0) { -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost = 0; +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0; +double avgCost = 0; UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0; -UInt64 totalCount = 0; +UInt64 totalCountNeigh = 0, totalCountAll = 0; TComDataCU* above = outTempCU-getCUAbove(); TComDataCU* aboveLeft = outTempCU-getCUAboveLeft(); TComDataCU* aboveRight = outTempCU-getCUAboveRight(); @@ -614,10 +615,15 @@ countCUColocated1 = colocated1-m_count[depth]; } -totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; -totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; -if (totalCount != 0) -avgCost = totalCost / totalCount; +totalCostNeigh = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; +totalCountNeigh = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; + +totalCostAll = (outTempCU-getPic()-m_avgCost[depth] * outTempCU-getPic()-m_count[depth]) - totalCostNeigh; +totalCountAll = outTempCU-getPic()-m_count[depth] - totalCountNeigh; + +//giving 60% weight to all CU's and 40% weight to neighbour CU's +if (totalCountAll) +avgCost = ((0.6 * totalCostAll) + (0.4 * totalCostNeigh)) / ((0.6 * totalCountAll) + (0.4 * totalCountNeigh)); float lambda = 1.0f; @@ -672,6 +678,9 @@ outTempCU-getPic()-getPicSym()-getCU(outTempCU-getAddr())-m_count[depth + 1] += 1; outTempCU-m_avgCost[depth + 1] = (temp + tempavgCost) / outTempCU-m_count[depth + 1]; outTempCU-getPic()-getPicSym()-getCU(outTempCU-getAddr())-m_avgCost[depth + 1] = outTempCU-m_avgCost[depth + 1]; +temp = outTempCU-getPic()-m_avgCost[depth+1] * outTempCU-getPic()-m_count[depth+1]; +outTempCU-getPic()-m_count[depth+1] += 1; +outTempCU-getPic()-m_avgCost[depth+1] = (temp + tempavgCost) /
Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit
On Thu, Nov 7, 2013 at 5:29 AM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddy # Date 1383823751 -19800 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075 # Parent 0a1b379be359cbcf76140ac392104c856a037c78 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit Early exit is done when CU cost at depth n is lessthan sum of 60% of avgcost of all CU's and 40% of avgcost of neighbour CU's at same depth. diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -69,6 +69,14 @@ m_ssimCnt = 0; m_frameTime = 0.0; m_elapsedCompressTime = 0.0; +m_avgCost[0] = 0; +m_avgCost[1] = 0; +m_avgCost[2] = 0; +m_avgCost[3] = 0; +m_count[0] = 0; +m_count[1] = 0; +m_count[2] = 0; +m_count[3] = 0; } TComPic::~TComPic() diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h --- a/source/Lib/TLibCommon/TComPic.h Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.h Thu Nov 07 16:59:11 2013 +0530 @@ -95,6 +95,8 @@ MD5Contextm_state[3]; uint32_t m_crc[3]; uint32_t m_checksum[3]; +UInt64m_avgCost[4]; we should be using uint64_t for new code that we write +uint32_t m_count[4]; /* SSIM values per frame */ doublem_ssim; diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/encoder/compress.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -567,13 +567,14 @@ if (bSubBranch bTrySplitDQP depth g_maxCUDepth - g_addCUDepth) { #if EARLY_EXIT // turn ON this to enable early exit -// early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the -// CU's(above, aboveleft, aboveright, left, colocated) at depth n of previosuly coded CU's +// early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour +// CU's(above, aboveleft, aboveright, left, colocated) and all CU's at depth n with weightage for each quantity if (outBestCU != 0) { -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost = 0; +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0; +double avgCost = 0; UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0; -UInt64 totalCount = 0; +UInt64 totalCountNeigh = 0, totalCountAll = 0; ditto for these, and these lines should be broken up at around 100 chars TComDataCU* above = outTempCU-getCUAbove(); TComDataCU* aboveLeft = outTempCU-getCUAboveLeft(); TComDataCU* aboveRight = outTempCU-getCUAboveRight(); @@ -614,10 +615,15 @@ countCUColocated1 = colocated1-m_count[depth]; } -totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; -totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; -if (totalCount != 0) -avgCost = totalCost / totalCount; +totalCostNeigh = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; +totalCountNeigh = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; do we need all these counters? It looks like we could just increment totalCost and totalCount within each if() clause? + +totalCostAll = (outTempCU-getPic()-m_avgCost[depth] * outTempCU-getPic()-m_count[depth]) - totalCostNeigh; +totalCountAll = outTempCU-getPic()-m_count[depth] - totalCountNeigh; + +//giving 60% weight to all CU's and 40% weight to neighbour CU's +if (totalCountAll) +avgCost = ((0.6 * totalCostAll) + (0.4 * totalCostNeigh)) / ((0.6 * totalCountAll) + (0.4 * totalCountNeigh)); float lambda = 1.0f; @@ -672,6 +678,9 @@ outTempCU-getPic()-getPicSym()-getCU(outTempCU-getAddr())-m_count[depth + 1] += 1; outTempCU-m_avgCost[depth + 1] = (temp + tempavgCost) / outTempCU-m_count[depth + 1];
Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit
I have a few questions. 1. Do we need so many local variables? 2. Why are we adding outTempCU-cost to totalCost and then comparing against outBestCU-cost? That doesnt make much sense to me. AFAIk, outTempCU does not contain any valid data - we should remove this. 3. Should we be adding costCUColocated0 and costCUColocated1 also? Adding up spatial and temporal costs, and then comparing against a threshold derived from spatial costs - umm, no. Lets leave these out. 4. The rest of it looks ok, logically. But now you may need to re-tune this with different weights. Best, Deepthi On Thu, Nov 7, 2013 at 4:59 PM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddy # Date 1383823751 -19800 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075 # Parent 0a1b379be359cbcf76140ac392104c856a037c78 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit Early exit is done when CU cost at depth n is lessthan sum of 60% of avgcost of all CU's and 40% of avgcost of neighbour CU's at same depth. diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -69,6 +69,14 @@ m_ssimCnt = 0; m_frameTime = 0.0; m_elapsedCompressTime = 0.0; +m_avgCost[0] = 0; +m_avgCost[1] = 0; +m_avgCost[2] = 0; +m_avgCost[3] = 0; +m_count[0] = 0; +m_count[1] = 0; +m_count[2] = 0; +m_count[3] = 0; } TComPic::~TComPic() diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h --- a/source/Lib/TLibCommon/TComPic.h Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.h Thu Nov 07 16:59:11 2013 +0530 @@ -95,6 +95,8 @@ MD5Contextm_state[3]; uint32_t m_crc[3]; uint32_t m_checksum[3]; +UInt64m_avgCost[4]; +uint32_t m_count[4]; /* SSIM values per frame */ doublem_ssim; diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/encoder/compress.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -567,13 +567,14 @@ if (bSubBranch bTrySplitDQP depth g_maxCUDepth - g_addCUDepth) { #if EARLY_EXIT // turn ON this to enable early exit -// early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the -// CU's(above, aboveleft, aboveright, left, colocated) at depth n of previosuly coded CU's +// early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour +// CU's(above, aboveleft, aboveright, left, colocated) and all CU's at depth n with weightage for each quantity if (outBestCU != 0) { -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost = 0; +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0; +double avgCost = 0; UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0; -UInt64 totalCount = 0; +UInt64 totalCountNeigh = 0, totalCountAll = 0; TComDataCU* above = outTempCU-getCUAbove(); TComDataCU* aboveLeft = outTempCU-getCUAboveLeft(); TComDataCU* aboveRight = outTempCU-getCUAboveRight(); @@ -614,10 +615,15 @@ countCUColocated1 = colocated1-m_count[depth]; } -totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; -totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; -if (totalCount != 0) -avgCost = totalCost / totalCount; +totalCostNeigh = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; +totalCountNeigh = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; + +totalCostAll = (outTempCU-getPic()-m_avgCost[depth] * outTempCU-getPic()-m_count[depth]) - totalCostNeigh; +totalCountAll = outTempCU-getPic()-m_count[depth] - totalCountNeigh; + +//giving 60% weight to all CU's and 40% weight to neighbour CU's +if (totalCountAll) +avgCost = ((0.6 * totalCostAll) + (0.4 *
Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit
On Fri, Nov 8, 2013 at 8:03 AM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: I have a few questions. 1. Do we need so many local variables? As Steve suggested, totalCostNeigh and totalCountNeigh can be added inside each if () and will try to remove costCU, costCUAbove , costCUAboveLeft, costCUAboveRight, costCULeft , costCUColocated0 , costCUColocated1 and their corresponding count variables also 2. Why are we adding outTempCU-cost to totalCost and then comparing against outBestCU-cost? That doesnt make much sense to me. AFAIk, outTempCU does not contain any valid data - we should remove this. Actually we are taking outTempCU-getPic()-m_avgCost[depth] to total cost and comparing with outBestCU-cost. outTempCU-getPic()-m_avgCost[depth] will have avg cost of all the CU's so far encoded in the frame. 3. Should we be adding costCUColocated0 and costCUColocated1 also? Adding up spatial and temporal costs, and then comparing against a threshold derived from spatial costs - umm, no. Lets leave these out. will remove both colocated0 and colocated1 4. The rest of it looks ok, logically. But now you may need to re-tune this with different weights. Best, Deepthi On Thu, Nov 7, 2013 at 4:59 PM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddy # Date 1383823751 -19800 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075 # Parent 0a1b379be359cbcf76140ac392104c856a037c78 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit Early exit is done when CU cost at depth n is lessthan sum of 60% of avgcost of all CU's and 40% of avgcost of neighbour CU's at same depth. diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -69,6 +69,14 @@ m_ssimCnt = 0; m_frameTime = 0.0; m_elapsedCompressTime = 0.0; +m_avgCost[0] = 0; +m_avgCost[1] = 0; +m_avgCost[2] = 0; +m_avgCost[3] = 0; +m_count[0] = 0; +m_count[1] = 0; +m_count[2] = 0; +m_count[3] = 0; } TComPic::~TComPic() diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h --- a/source/Lib/TLibCommon/TComPic.h Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.h Thu Nov 07 16:59:11 2013 +0530 @@ -95,6 +95,8 @@ MD5Contextm_state[3]; uint32_t m_crc[3]; uint32_t m_checksum[3]; +UInt64m_avgCost[4]; +uint32_t m_count[4]; /* SSIM values per frame */ doublem_ssim; diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/encoder/compress.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -567,13 +567,14 @@ if (bSubBranch bTrySplitDQP depth g_maxCUDepth - g_addCUDepth) { #if EARLY_EXIT // turn ON this to enable early exit -// early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the -// CU's(above, aboveleft, aboveright, left, colocated) at depth n of previosuly coded CU's +// early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour +// CU's(above, aboveleft, aboveright, left, colocated) and all CU's at depth n with weightage for each quantity if (outBestCU != 0) { -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost = 0; +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0; +double avgCost = 0; UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0; -UInt64 totalCount = 0; +UInt64 totalCountNeigh = 0, totalCountAll = 0; TComDataCU* above = outTempCU-getCUAbove(); TComDataCU* aboveLeft = outTempCU-getCUAboveLeft(); TComDataCU* aboveRight = outTempCU-getCUAboveRight(); @@ -614,10 +615,15 @@ countCUColocated1 = colocated1-m_count[depth]; } -totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; -totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; -if (totalCount != 0) -avgCost = totalCost / totalCount; +totalCostNeigh = costCU + costCUAbove