# HG changeset patch
# User Bhavna Hariharan <bha...@multicorewareinc.com>
# Date 1523528451 -19800
#      Thu Apr 12 15:50:51 2018 +0530
# Node ID bff8e6d4407bebd8ff19be8323deacc16be4875c
# Parent  04a337abd70de269cef7d9655365f3a3ebde02aa
dynamic-refine: Remove lock while collecting CTU data

Locks were used to avoid the possibility of race conditions while copying
data from CTU level to frame level. Now, the data is collected for each row and
when the entire frame completes analysis the row data is copied to the frame.
This method eliminates the possibility of a race condition without having to
employ locks.

diff -r 04a337abd70d -r bff8e6d4407b source/common/common.h
--- a/source/common/common.h    Thu Apr 12 15:10:59 2018 +0530
+++ b/source/common/common.h    Thu Apr 12 15:50:51 2018 +0530
@@ -332,6 +332,8 @@
 #define START_CODE_OVERHEAD 3 
 #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
 
+#define MAX_NUM_DYN_REFINE          ((NUM_CU_DEPTH - 1) * 
X265_REFINE_INTER_LEVELS)
+
 namespace X265_NS {
 
 enum { SAO_NUM_OFFSET = 4 };
diff -r 04a337abd70d -r bff8e6d4407b source/common/framedata.h
--- a/source/common/framedata.h Thu Apr 12 15:10:59 2018 +0530
+++ b/source/common/framedata.h Thu Apr 12 15:50:51 2018 +0530
@@ -88,6 +88,11 @@
     uint64_t    cntInterPu[NUM_CU_DEPTH][INTER_MODES - 1];
     uint64_t    cntMergePu[NUM_CU_DEPTH][INTER_MODES - 1];
 
+    /* Feature values per row for dynamic refinement */
+    uint64_t       rowRdDyn[MAX_NUM_DYN_REFINE];
+    uint32_t       rowVarDyn[MAX_NUM_DYN_REFINE];
+    uint32_t       rowCntDyn[MAX_NUM_DYN_REFINE];
+
     FrameStats()
     {
         memset(this, 0, sizeof(FrameStats));
diff -r 04a337abd70d -r bff8e6d4407b source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/frameencoder.cpp   Thu Apr 12 15:50:51 2018 +0530
@@ -935,6 +935,9 @@
         }
     } // end of (m_param->maxSlices > 1)
 
+    if (m_param->bDynamicRefine && m_top->m_startPoint <= 
m_frame->m_encodeOrder) //Avoid collecting data that will not be used by future 
frames.
+        collectDynDataFrame();
+
     if (m_param->rc.bStatWrite)
     {
         int totalI = 0, totalP = 0, totalSkip = 0;
@@ -1473,27 +1476,13 @@
 
         // Does all the CU analysis, returns best top level mode decision
         Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, 
m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
-        if (m_param->bDynamicRefine)
+
+        /* startPoint > encodeOrder is true when the start point changes for
+        a new GOP but all frames of the previous GOP is still incomplete
+        The data from these frames will not be used by any future frames. */
+        if (m_param->bDynamicRefine && m_top->m_startPoint <= 
m_frame->m_encodeOrder)
         {
-            if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid 
collecting data that will not be used by future frames.
-            {
-                ScopedLock dynLock(m_top->m_dynamicRefineLock);
-                for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
-                {
-                    for (uint32_t depth = 0; depth < m_param->maxCUDepth; 
depth++)
-                    {
-                        int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
-                        int curFrameIndex = m_frame->m_encodeOrder - 
m_top->m_startPoint;
-                        int index = (curFrameIndex * X265_REFINE_INTER_LEVELS 
* m_param->maxCUDepth) + offset;
-                        if (ctu->m_collectCUCount[offset])
-                        {
-                            m_top->m_variance[index] += 
ctu->m_collectCUVariance[offset];
-                            m_top->m_rdCost[index] += 
ctu->m_collectCURd[offset];
-                            m_top->m_trainingCount[index] += 
ctu->m_collectCUCount[offset];
-                        }
-                    }
-                }
-            }
+            collectDynDataRow(*ctu, &curRow.rowStats);
             X265_FREE_ZERO(ctu->m_collectCUVariance);
             X265_FREE_ZERO(ctu->m_collectCURd);
             X265_FREE_ZERO(ctu->m_collectCUCount);
@@ -1880,6 +1869,46 @@
     if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
         m_completionEvent.trigger();
 }
+
+void FrameEncoder::collectDynDataRow(const CUData& ctu, FrameStats* rowStats)
+{
+    for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
+    {
+        for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+        {
+            int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
+            if (ctu.m_collectCUCount[offset])
+            {
+                rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[offset];
+                rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
+                rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];
+            }
+        }
+    }
+}
+
+void FrameEncoder::collectDynDataFrame()
+{
+    for (uint32_t row = 0; row < m_numRows; row++)
+    {
+        for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; 
refLevel++)
+        {
+            for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+            {
+                int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;
+                int curFrameIndex = m_frame->m_encodeOrder - 
m_top->m_startPoint;
+                int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * 
m_param->maxCUDepth) + offset;
+                if (m_rows[row].rowStats.rowCntDyn[offset])
+                {
+                    m_top->m_variance[index] += 
m_rows[row].rowStats.rowVarDyn[offset];
+                    m_top->m_rdCost[index] += 
m_rows[row].rowStats.rowRdDyn[offset];
+                    m_top->m_trainingCount[index] += 
m_rows[row].rowStats.rowCntDyn[offset];
+                }
+            }
+        }
+    }
+}
+
 void FrameEncoder::computeAvgTrainingData()
 {
     if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)
diff -r 04a337abd70d -r bff8e6d4407b source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h     Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/frameencoder.h     Thu Apr 12 15:50:51 2018 +0530
@@ -240,6 +240,9 @@
     void enqueueRowFilter(int row)  { WaveFront::enqueueRow(row * 2 + 1); }
     void enableRowEncoder(int row)  { WaveFront::enableRow(row * 2 + 0); }
     void enableRowFilter(int row)   { WaveFront::enableRow(row * 2 + 1); }
+
+    void collectDynDataRow(const CUData& ctu, FrameStats* rowStats);
+    void collectDynDataFrame();
 };
 }
 
# HG changeset patch
# User Bhavna Hariharan <bha...@multicorewareinc.com>
# Date 1523528451 -19800
#      Thu Apr 12 15:50:51 2018 +0530
# Node ID bff8e6d4407bebd8ff19be8323deacc16be4875c
# Parent  04a337abd70de269cef7d9655365f3a3ebde02aa
dynamic-refine: Remove lock while collecting CTU data

Locks were used to avoid the possibility of race conditions while copying
data from CTU level to frame level. Now, the data is collected for each row and
when the entire frame completes analysis the row data is copied to the frame.
This method eliminates the possibility of a race condition without having to
employ locks.

diff -r 04a337abd70d -r bff8e6d4407b source/common/common.h
--- a/source/common/common.h	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/common/common.h	Thu Apr 12 15:50:51 2018 +0530
@@ -332,6 +332,8 @@
 #define START_CODE_OVERHEAD 3 
 #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
 
+#define MAX_NUM_DYN_REFINE          ((NUM_CU_DEPTH - 1) * X265_REFINE_INTER_LEVELS)
+
 namespace X265_NS {
 
 enum { SAO_NUM_OFFSET = 4 };
diff -r 04a337abd70d -r bff8e6d4407b source/common/framedata.h
--- a/source/common/framedata.h	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/common/framedata.h	Thu Apr 12 15:50:51 2018 +0530
@@ -88,6 +88,11 @@
     uint64_t    cntInterPu[NUM_CU_DEPTH][INTER_MODES - 1];
     uint64_t    cntMergePu[NUM_CU_DEPTH][INTER_MODES - 1];
 
+    /* Feature values per row for dynamic refinement */
+    uint64_t       rowRdDyn[MAX_NUM_DYN_REFINE];
+    uint32_t       rowVarDyn[MAX_NUM_DYN_REFINE];
+    uint32_t       rowCntDyn[MAX_NUM_DYN_REFINE];
+
     FrameStats()
     {
         memset(this, 0, sizeof(FrameStats));
diff -r 04a337abd70d -r bff8e6d4407b source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/frameencoder.cpp	Thu Apr 12 15:50:51 2018 +0530
@@ -935,6 +935,9 @@
         }
     } // end of (m_param->maxSlices > 1)
 
+    if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder) //Avoid collecting data that will not be used by future frames.
+        collectDynDataFrame();
+
     if (m_param->rc.bStatWrite)
     {
         int totalI = 0, totalP = 0, totalSkip = 0;
@@ -1473,27 +1476,13 @@
 
         // Does all the CU analysis, returns best top level mode decision
         Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
-        if (m_param->bDynamicRefine)
+
+        /* startPoint > encodeOrder is true when the start point changes for
+        a new GOP but all frames of the previous GOP is still incomplete
+        The data from these frames will not be used by any future frames. */
+        if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder)
         {
-            if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid collecting data that will not be used by future frames.
-            {
-                ScopedLock dynLock(m_top->m_dynamicRefineLock);
-                for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
-                {
-                    for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
-                    {
-                        int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
-                        int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;
-                        int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
-                        if (ctu->m_collectCUCount[offset])
-                        {
-                            m_top->m_variance[index] += ctu->m_collectCUVariance[offset];
-                            m_top->m_rdCost[index] += ctu->m_collectCURd[offset];
-                            m_top->m_trainingCount[index] += ctu->m_collectCUCount[offset];
-                        }
-                    }
-                }
-            }
+            collectDynDataRow(*ctu, &curRow.rowStats);
             X265_FREE_ZERO(ctu->m_collectCUVariance);
             X265_FREE_ZERO(ctu->m_collectCURd);
             X265_FREE_ZERO(ctu->m_collectCUCount);
@@ -1880,6 +1869,46 @@
     if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
         m_completionEvent.trigger();
 }
+
+void FrameEncoder::collectDynDataRow(const CUData& ctu, FrameStats* rowStats)
+{
+    for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
+    {
+        for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+        {
+            int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
+            if (ctu.m_collectCUCount[offset])
+            {
+                rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[offset];
+                rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
+                rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];
+            }
+        }
+    }
+}
+
+void FrameEncoder::collectDynDataFrame()
+{
+    for (uint32_t row = 0; row < m_numRows; row++)
+    {
+        for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)
+        {
+            for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+            {
+                int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;
+                int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;
+                int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
+                if (m_rows[row].rowStats.rowCntDyn[offset])
+                {
+                    m_top->m_variance[index] += m_rows[row].rowStats.rowVarDyn[offset];
+                    m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[offset];
+                    m_top->m_trainingCount[index] += m_rows[row].rowStats.rowCntDyn[offset];
+                }
+            }
+        }
+    }
+}
+
 void FrameEncoder::computeAvgTrainingData()
 {
     if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)
diff -r 04a337abd70d -r bff8e6d4407b source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/frameencoder.h	Thu Apr 12 15:50:51 2018 +0530
@@ -240,6 +240,9 @@
     void enqueueRowFilter(int row)  { WaveFront::enqueueRow(row * 2 + 1); }
     void enableRowEncoder(int row)  { WaveFront::enableRow(row * 2 + 0); }
     void enableRowFilter(int row)   { WaveFront::enableRow(row * 2 + 1); }
+
+    void collectDynDataRow(const CUData& ctu, FrameStats* rowStats);
+    void collectDynDataFrame();
 };
 }
 
_______________________________________________
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to