# HG changeset patch
# User Steve Borho <[email protected]>
# Date 1425584683 21600
#      Thu Mar 05 13:44:43 2015 -0600
# Node ID 820dcc3216a55965b4f763dcc4ed4cf2244d4de7
# Parent  e6b519dfbf812f0ba392a70ce651bf589d2ab82a
frameencoder: use a bonded worker thread to perform weight analysis, add stat

Weight analysis can take a substantial amount of time. It is best to use a
worker thread so the frame encoder thread can stay blocked during all of this
processing (we want worker threads to use the cores, not the frame encoders)

Weight analysis can be 1% of the total elapsed encoder time

diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp        Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/encoder.cpp        Thu Mar 05 13:44:43 2015 -0600
@@ -848,7 +848,8 @@
     int64_t lookaheadWorkerTime = m_lookahead->m_slicetypeDecideElapsedTime + 
m_lookahead->m_preLookaheadElapsedTime +
                                   batchElapsedTime + coopSliceElapsedTime;
 
-    int64_t totalWorkerTime = cuStats.totalCTUTime + 
cuStats.loopFilterElapsedTime + cuStats.pmodeTime + cuStats.pmeTime + 
lookaheadWorkerTime;
+    int64_t totalWorkerTime = cuStats.totalCTUTime + 
cuStats.loopFilterElapsedTime + cuStats.pmodeTime +
+                              cuStats.pmeTime + lookaheadWorkerTime + 
cuStats.weightAnalyzeTime;
     int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
 
     int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
@@ -898,6 +899,12 @@
     x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in loop 
filters, average %.3lf ms per call\n",
              100.0 * cuStats.loopFilterElapsedTime / totalWorkerTime,
              ELAPSED_MSEC(cuStats.loopFilterElapsedTime) / 
cuStats.countLoopFilter);
+    if (cuStats.countWeightAnalyze && cuStats.weightAnalyzeTime)
+    {
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in weight 
analysis, average %.3lf ms per call\n",
+                 100.0 * cuStats.weightAnalyzeTime / totalWorkerTime,
+                 ELAPSED_MSEC(cuStats.weightAnalyzeTime) / 
cuStats.countWeightAnalyze);
+    }
     if (m_param->bDistributeModeAnalysis && cuStats.countPModeMasters)
     {
         x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PMODE masters per CTU, 
each blocked an average of %.3lf ns\n",
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.cpp   Thu Mar 05 13:44:43 2015 -0600
@@ -284,6 +284,12 @@
     }
 }
 
+void FrameEncoder::WeightAnalysis::processTasks(int /* workerThreadId */)
+{
+    Frame* frame = master.m_frame;
+    weightAnalyse(*frame->m_encData->m_slice, *frame, *master.m_param);
+}
+
 void FrameEncoder::compressFrame()
 {
     ProfileScopeEvent(frameThread);
@@ -316,7 +322,18 @@
     bool bUseWeightP = slice->m_sliceType == P_SLICE && 
slice->m_pps->bUseWeightPred;
     bool bUseWeightB = slice->m_sliceType == B_SLICE && 
slice->m_pps->bUseWeightedBiPred;
     if (bUseWeightP || bUseWeightB)
-        weightAnalyse(*slice, *m_frame, *m_param);
+    {
+#if DETAILED_CU_STATS
+        m_cuStats.countWeightAnalyze++;
+        ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
+#endif
+        WeightAnalysis wa(*this);
+        if (m_pool && wa.tryBondPeers(*this, 1))
+            /* use an idle worker for weight analysis */
+            wa.waitForExit();
+        else
+            weightAnalyse(*slice, *m_frame, *m_param);
+    }
     else
         slice->disableWeights();
 
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h     Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.h     Thu Mar 05 13:44:43 2015 -0600
@@ -200,6 +200,21 @@
     FrameFilter              m_frameFilter;
     NALList                  m_nalList;
 
+    class WeightAnalysis : public BondedTaskGroup
+    {
+    public:
+
+        FrameEncoder& master;
+
+        WeightAnalysis(FrameEncoder& fe) : master(fe) {}
+
+        void processTasks(int workerThreadId);
+
+    protected:
+
+        WeightAnalysis operator=(const WeightAnalysis&);
+    };
+
 protected:
 
     bool initializeGeoms();
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/search.h
--- a/source/encoder/search.h   Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/search.h   Thu Mar 05 13:44:43 2015 -0600
@@ -152,6 +152,7 @@
     int64_t  pmeBlockTime;                      // elapsed worker time blocked 
for pme batch completion
     int64_t  pmodeTime;                         // elapsed worker time 
processing pmode slave jobs
     int64_t  pmodeBlockTime;                    // elapsed worker time blocked 
for pmode batch completion
+    int64_t  weightAnalyzeTime;                 // elapsed worker time 
analyzing reference weights
     int64_t  totalCTUTime;                      // elapsed worker time in 
compressCTU (includes pmode master)
 
     uint64_t countIntraRDO[NUM_CU_DEPTH];
@@ -163,6 +164,7 @@
     uint64_t countPMEMasters;
     uint64_t countPModeTasks;
     uint64_t countPModeMasters;
+    uint64_t countWeightAnalyze;
     uint64_t totalCTUs;
 
     CUStats() { clear(); }
@@ -189,6 +191,7 @@
         pmeBlockTime += other.pmeBlockTime;
         pmodeTime += other.pmodeTime;
         pmodeBlockTime += other.pmodeBlockTime;
+        weightAnalyzeTime += other.weightAnalyzeTime;
         totalCTUTime += other.totalCTUTime;
 
         countIntraAnalysis += other.countIntraAnalysis;
@@ -198,6 +201,7 @@
         countPMEMasters += other.countPMEMasters;
         countPModeTasks += other.countPModeTasks;
         countPModeMasters += other.countPModeMasters;
+        countWeightAnalyze += other.countWeightAnalyze;
         totalCTUs += other.totalCTUs;
 
         other.clear();
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to