# HG changeset patch
# User Steve Borho <[email protected]>
# Date 1425584683 21600
# Thu Mar 05 13:44:43 2015 -0600
# Node ID 820dcc3216a55965b4f763dcc4ed4cf2244d4de7
# Parent e6b519dfbf812f0ba392a70ce651bf589d2ab82a
frameencoder: use a bonded worker thread to perform weight analysis, add stat
Weight analysis can take a substantial amount of time. It is best to use a
worker thread so the frame encoder thread can stay blocked during all of this
processing (we want worker threads to use the cores, not the frame encoders)
Weight analysis can be 1% of the total elapsed encoder time
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/encoder.cpp Thu Mar 05 13:44:43 2015 -0600
@@ -848,7 +848,8 @@
int64_t lookaheadWorkerTime = m_lookahead->m_slicetypeDecideElapsedTime +
m_lookahead->m_preLookaheadElapsedTime +
batchElapsedTime + coopSliceElapsedTime;
- int64_t totalWorkerTime = cuStats.totalCTUTime +
cuStats.loopFilterElapsedTime + cuStats.pmodeTime + cuStats.pmeTime +
lookaheadWorkerTime;
+ int64_t totalWorkerTime = cuStats.totalCTUTime +
cuStats.loopFilterElapsedTime + cuStats.pmodeTime +
+ cuStats.pmeTime + lookaheadWorkerTime +
cuStats.weightAnalyzeTime;
int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
@@ -898,6 +899,12 @@
x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in loop
filters, average %.3lf ms per call\n",
100.0 * cuStats.loopFilterElapsedTime / totalWorkerTime,
ELAPSED_MSEC(cuStats.loopFilterElapsedTime) /
cuStats.countLoopFilter);
+ if (cuStats.countWeightAnalyze && cuStats.weightAnalyzeTime)
+ {
+ x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in weight
analysis, average %.3lf ms per call\n",
+ 100.0 * cuStats.weightAnalyzeTime / totalWorkerTime,
+ ELAPSED_MSEC(cuStats.weightAnalyzeTime) /
cuStats.countWeightAnalyze);
+ }
if (m_param->bDistributeModeAnalysis && cuStats.countPModeMasters)
{
x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PMODE masters per CTU,
each blocked an average of %.3lf ns\n",
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.cpp Thu Mar 05 13:44:43 2015 -0600
@@ -284,6 +284,12 @@
}
}
+void FrameEncoder::WeightAnalysis::processTasks(int /* workerThreadId */)
+{
+ Frame* frame = master.m_frame;
+ weightAnalyse(*frame->m_encData->m_slice, *frame, *master.m_param);
+}
+
void FrameEncoder::compressFrame()
{
ProfileScopeEvent(frameThread);
@@ -316,7 +322,18 @@
bool bUseWeightP = slice->m_sliceType == P_SLICE &&
slice->m_pps->bUseWeightPred;
bool bUseWeightB = slice->m_sliceType == B_SLICE &&
slice->m_pps->bUseWeightedBiPred;
if (bUseWeightP || bUseWeightB)
- weightAnalyse(*slice, *m_frame, *m_param);
+ {
+#if DETAILED_CU_STATS
+ m_cuStats.countWeightAnalyze++;
+ ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
+#endif
+ WeightAnalysis wa(*this);
+ if (m_pool && wa.tryBondPeers(*this, 1))
+ /* use an idle worker for weight analysis */
+ wa.waitForExit();
+ else
+ weightAnalyse(*slice, *m_frame, *m_param);
+ }
else
slice->disableWeights();
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.h Thu Mar 05 13:44:43 2015 -0600
@@ -200,6 +200,21 @@
FrameFilter m_frameFilter;
NALList m_nalList;
+ class WeightAnalysis : public BondedTaskGroup
+ {
+ public:
+
+ FrameEncoder& master;
+
+ WeightAnalysis(FrameEncoder& fe) : master(fe) {}
+
+ void processTasks(int workerThreadId);
+
+ protected:
+
+ WeightAnalysis operator=(const WeightAnalysis&);
+ };
+
protected:
bool initializeGeoms();
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/search.h
--- a/source/encoder/search.h Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/search.h Thu Mar 05 13:44:43 2015 -0600
@@ -152,6 +152,7 @@
int64_t pmeBlockTime; // elapsed worker time blocked
for pme batch completion
int64_t pmodeTime; // elapsed worker time
processing pmode slave jobs
int64_t pmodeBlockTime; // elapsed worker time blocked
for pmode batch completion
+ int64_t weightAnalyzeTime; // elapsed worker time
analyzing reference weights
int64_t totalCTUTime; // elapsed worker time in
compressCTU (includes pmode master)
uint64_t countIntraRDO[NUM_CU_DEPTH];
@@ -163,6 +164,7 @@
uint64_t countPMEMasters;
uint64_t countPModeTasks;
uint64_t countPModeMasters;
+ uint64_t countWeightAnalyze;
uint64_t totalCTUs;
CUStats() { clear(); }
@@ -189,6 +191,7 @@
pmeBlockTime += other.pmeBlockTime;
pmodeTime += other.pmodeTime;
pmodeBlockTime += other.pmodeBlockTime;
+ weightAnalyzeTime += other.weightAnalyzeTime;
totalCTUTime += other.totalCTUTime;
countIntraAnalysis += other.countIntraAnalysis;
@@ -198,6 +201,7 @@
countPMEMasters += other.countPMEMasters;
countPModeTasks += other.countPModeTasks;
countPModeMasters += other.countPModeMasters;
+ countWeightAnalyze += other.countWeightAnalyze;
totalCTUs += other.totalCTUs;
other.clear();
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel