[x265] [PATCH 6 of 6] vbv: integrate row level vbv ratecontrol at each major row diagonal

2014-02-20 Thread aarthi
# HG changeset patch
# User Aarthi Thirumalai
# Date 1392901254 -19800
#  Thu Feb 20 18:30:54 2014 +0530
# Node ID 650d5f835e417f45bd8a9f86465ca1909eaa9526
# Parent  49b90667f050a7dd9c28b5017f389f5c29a1c191
vbv: integrate row level vbv ratecontrol at each major row diagonal.

diff -r 49b90667f050 -r 650d5f835e41 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Thu Feb 20 18:23:48 2014 +0530
+++ b/source/encoder/frameencoder.cpp   Thu Feb 20 18:30:54 2014 +0530
@@ -1057,6 +1057,7 @@
 CTURow codeRow = m_rows[m_cfg-param.bEnableWavefront ? row : 0];
 const uint32_t numCols = m_pic-getPicSym()-getFrameWidthInCU();
 const uint32_t lineStartCUAddr = row * numCols;
+double qpBase = m_pic-m_avgQpRc;
 for (uint32_t col = curRow.m_completed; col  numCols; col++)
 {
 const uint32_t cuAddr = lineStartCUAddr + col;
@@ -1067,24 +1068,42 @@
 codeRow.m_entropyCoder.resetEntropy();
 
 TEncSbac *bufSbac = (m_cfg-param.bEnableWavefront  col == 0  row 
 0) ? m_rows[row - 1].m_bufferSbacCoder : NULL;
-if (m_cfg-param.rc.aqMode)
+
+if ((uint32_t)row = col  (row !=0))
+qpBase = m_pic-getCU(cuAddr - numCols + 1)-m_baseQp;
+
+if (m_cfg-param.rc.aqMode || (m_cfg-param.rc.vbvBufferSize 0  
m_cfg-param.rc.vbvMaxBitrate 0))
 {
-int qp = calcQpForCu(m_pic, cuAddr);
+int qp = calcQpForCu(m_pic, cuAddr , qpBase);
 setLambda(qp, row);
-if (qp  MAX_QP)
-qp = MAX_QP;
-cu-setQP(0, (char)qp);
+qp = X265_MIN(qp, MAX_QP);
+cu-setQP(0,char(qp));
+cu-m_baseQp = qpBase;
 }
 codeRow.processCU(cu, m_pic-getSlice(), bufSbac, 
m_cfg-param.bEnableWavefront  col == 1);
 
-// TODO: Keep atomic running totals for rate control?
-// cu-m_totalBits;
-// cu-m_totalCost;
-// cu-m_totalDistortion;
+if (m_cfg-param.rc.vbvBufferSize  m_cfg-param.rc.vbvMaxBitrate)
+{
+// Update encoded bits, satdCost, baseQP for each CU
+m_pic-m_rowDiagSatd[row] += m_pic-m_cuCostsForVbv[cuAddr];
+m_pic-m_rowEncodedBits[row] += cu-m_totalBits;
+m_pic-m_numEncodedCusPerRow[row] = cuAddr;
+m_pic-m_qpaAq[row] += cu-getQP(0);
+m_pic-m_qpaRc[row] += cu-m_baseQp;
+
+if ((uint32_t)row == col)
+m_pic-m_rowDiagQp[row] = qpBase;
+
+// If current block is at row diagonal checkpoint, call vbv 
ratecontrol.
+if ((uint32_t)row == col  row != 0 )
+{
+ m_top-m_rateControl-rowDiagonalVbvRateControl(m_pic, row, 
m_rce, qpBase);
+ qpBase = Clip3((double)MIN_QP, (double)MAX_MAX_QP, qpBase);
+}
+}
 
 // Completed CU processing
 m_rows[row].m_completed++;
-
 if (m_rows[row].m_completed = 2  row  m_numRows - 1)
 {
 ScopedLock below(m_rows[row + 1].m_lock);
@@ -1128,34 +1147,43 @@
 curRow.m_busy = false;
 }
 
-int FrameEncoder::calcQpForCu(TComPic *pic, uint32_t cuAddr)
+int FrameEncoder::calcQpForCu(TComPic *pic, uint32_t cuAddr, double baseQp)
 {
 x265_emms();
-double qp = pic-getSlice()-m_avgQpRc;
-if (m_cfg-param.rc.aqMode)
+double qp = baseQp;
+
+/* Derive qpOffet for each CU by averaging offsets for all 16x16 blocks in 
the cu. */
+double qp_offset = 0;
+int maxBlockCols = (pic-getPicYuvOrg()-getWidth() + (16 - 1)) / 16;
+int maxBlockRows = (pic-getPicYuvOrg()-getHeight() + (16 - 1)) / 16;
+int noOfBlocks = g_maxCUWidth / 16;
+int block_y = (cuAddr / pic-getPicSym()-getFrameWidthInCU()) * 
noOfBlocks;
+int block_x = (cuAddr * noOfBlocks) - block_y * 
pic-getPicSym()-getFrameWidthInCU();
+
+double *qpoffs = (pic-getSlice()-isReferenced()  
m_cfg-param.rc.cuTree) ? pic-m_lowres.qpOffset : pic-m_lowres.qpAqOffset;
+int cnt = 0, idx =0;
+for (int h = 0; h  noOfBlocks  block_y  maxBlockRows; h++, block_y++)
 {
-/* Derive qpOffet for each CU by averaging offsets for all 16x16 
blocks in the cu. */
-double qp_offset = 0;
-int maxBlockCols = (pic-getPicYuvOrg()-getWidth() + (16 - 1)) / 16;
-int maxBlockRows = (pic-getPicYuvOrg()-getHeight() + (16 - 1)) / 16;
-int noOfBlocks = g_maxCUWidth / 16;
-int block_y = (cuAddr / pic-getPicSym()-getFrameWidthInCU()) * 
noOfBlocks;
-int block_x = (cuAddr * noOfBlocks) - block_y * 
pic-getPicSym()-getFrameWidthInCU();
+for (int w = 0; w  noOfBlocks  (block_x + w)  maxBlockCols; w++)
+{
+idx = block_x + w + (block_y * maxBlockCols);
+if (m_cfg-param.rc.aqMode)
+qp_offset += qpoffs[idx];
 
-double *qpoffs = (pic-getSlice()-isReferenced()  
m_cfg-param.rc.cuTree) ? pic-m_lowres.qpOffset : pic-m_lowres.qpAqOffset;
-int cnt = 0;
-for (int h = 0; h  noOfBlocks  

[x265] [PATCH 6 of 6] vbv: integrate row level vbv ratecontrol at each major row diagonal

2014-02-20 Thread aarthi
# HG changeset patch
# User Aarthi Thirumalai
# Date 1392901254 -19800
#  Thu Feb 20 18:30:54 2014 +0530
# Node ID 72f607f2dc765007149c1d933ec18154f513c5e7
# Parent  22d4811e0676fbec5e8bf96d99be9b98020fc89f
vbv: integrate row level vbv ratecontrol at each major row diagonal.

diff -r 22d4811e0676 -r 72f607f2dc76 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Fri Feb 21 11:35:48 2014 +0530
+++ b/source/encoder/frameencoder.cpp   Thu Feb 20 18:30:54 2014 +0530
@@ -1057,6 +1057,7 @@
 CTURow codeRow = m_rows[m_cfg-param.bEnableWavefront ? row : 0];
 const uint32_t numCols = m_pic-getPicSym()-getFrameWidthInCU();
 const uint32_t lineStartCUAddr = row * numCols;
+double qpBase = m_pic-m_avgQpRc;
 for (uint32_t col = curRow.m_completed; col  numCols; col++)
 {
 const uint32_t cuAddr = lineStartCUAddr + col;
@@ -1065,26 +1066,41 @@
 
 codeRow.m_entropyCoder.setEntropyCoder(m_sbacCoder, 
m_pic-getSlice());
 codeRow.m_entropyCoder.resetEntropy();
+TEncSbac *bufSbac = (m_cfg-param.bEnableWavefront  col == 0  row 
 0) ? m_rows[row - 1].m_bufferSbacCoder : NULL;
 
-TEncSbac *bufSbac = (m_cfg-param.bEnableWavefront  col == 0  row 
 0) ? m_rows[row - 1].m_bufferSbacCoder : NULL;
-if (m_cfg-param.rc.aqMode)
+if ((uint32_t)row = col  (row !=0))
+qpBase = m_pic-getCU(cuAddr - numCols + 1)-m_baseQp;
+
+if (m_cfg-param.rc.aqMode || (m_cfg-param.rc.vbvBufferSize 0  
m_cfg-param.rc.vbvMaxBitrate 0))
 {
-int qp = calcQpForCu(m_pic, cuAddr);
+int qp = calcQpForCu(m_pic, cuAddr , qpBase);
 setLambda(qp, row);
-if (qp  MAX_QP)
-qp = MAX_QP;
-cu-setQP(0, (char)qp);
+qp = X265_MIN(qp, MAX_QP);
+cu-setQP(0,char(qp));
+cu-m_baseQp = qpBase;
 }
 codeRow.processCU(cu, m_pic-getSlice(), bufSbac, 
m_cfg-param.bEnableWavefront  col == 1);
+if (m_cfg-param.rc.vbvBufferSize  m_cfg-param.rc.vbvMaxBitrate)
+{
+// Update encoded bits, satdCost, baseQP for each CU
+m_pic-m_rowDiagSatd[row] += m_pic-m_cuCostsForVbv[cuAddr];
+m_pic-m_rowEncodedBits[row] += cu-m_totalBits;
+m_pic-m_numEncodedCusPerRow[row] = cuAddr;
+m_pic-m_qpaAq[row] += cu-getQP(0);
+m_pic-m_qpaRc[row] += cu-m_baseQp;
 
-// TODO: Keep atomic running totals for rate control?
-// cu-m_totalBits;
-// cu-m_totalCost;
-// cu-m_totalDistortion;
+if ((uint32_t)row == col)
+m_pic-m_rowDiagQp[row] = qpBase;
 
+// If current block is at row diagonal checkpoint, call vbv 
ratecontrol.
+if ((uint32_t)row == col  row != 0 )
+{
+ m_top-m_rateControl-rowDiagonalVbvRateControl(m_pic, row, 
m_rce, qpBase);
+ qpBase = Clip3((double)MIN_QP, (double)MAX_MAX_QP, qpBase);
+}
+}
 // Completed CU processing
 m_rows[row].m_completed++;
-
 if (m_rows[row].m_completed = 2  row  m_numRows - 1)
 {
 ScopedLock below(m_rows[row + 1].m_lock);
@@ -1127,38 +1143,43 @@
 m_totalTime = m_totalTime + (x265_mdate() - startTime);
 curRow.m_busy = false;
 }
-
-int FrameEncoder::calcQpForCu(TComPic *pic, uint32_t cuAddr)
+int FrameEncoder::calcQpForCu(TComPic *pic, uint32_t cuAddr, double baseQp)
 {
 x265_emms();
-double qp = pic-getSlice()-m_avgQpRc;
-if (m_cfg-param.rc.aqMode)
+double qp = baseQp;
+
+/* Derive qpOffet for each CU by averaging offsets for all 16x16 blocks in 
the cu. */
+double qp_offset = 0;
+int maxBlockCols = (pic-getPicYuvOrg()-getWidth() + (16 - 1)) / 16;
+int maxBlockRows = (pic-getPicYuvOrg()-getHeight() + (16 - 1)) / 16;
+int noOfBlocks = g_maxCUWidth / 16;
+int block_y = (cuAddr / pic-getPicSym()-getFrameWidthInCU()) * 
noOfBlocks;
+int block_x = (cuAddr * noOfBlocks) - block_y * 
pic-getPicSym()-getFrameWidthInCU();
+
+double *qpoffs = (pic-getSlice()-isReferenced()  
m_cfg-param.rc.cuTree) ? pic-m_lowres.qpOffset : pic-m_lowres.qpAqOffset;
+int cnt = 0, idx =0;
+for (int h = 0; h  noOfBlocks  block_y  maxBlockRows; h++, block_y++)
 {
-/* Derive qpOffet for each CU by averaging offsets for all 16x16 
blocks in the cu. */
-double qp_offset = 0;
-int maxBlockCols = (pic-getPicYuvOrg()-getWidth() + (16 - 1)) / 16;
-int maxBlockRows = (pic-getPicYuvOrg()-getHeight() + (16 - 1)) / 16;
-int noOfBlocks = g_maxCUWidth / 16;
-int block_y = (cuAddr / pic-getPicSym()-getFrameWidthInCU()) * 
noOfBlocks;
-int block_x = (cuAddr * noOfBlocks) - block_y * 
pic-getPicSym()-getFrameWidthInCU();
+for (int w = 0; w  noOfBlocks  (block_x + w)  maxBlockCols; w++)
+{
+idx = block_x + w + (block_y * maxBlockCols);
+if