Hi, I am working on the review comments on this patch series. Will be sending the updated patches soon. Stay tuned!
Regards, *Pooja Venkatesan*, Video Codec Engineer, Media & AI analytics BU On Thu, Jun 25, 2020 at 9:00 PM Pooja Venkatesan <po...@multicorewareinc.com> wrote: > From 2777c2e3389eaf556f3420bc0717171bbcf97e52 Mon Sep 17 00:00:00 2001 > From: Pooja Venkatesan <po...@multicorewareinc.com> > Date: Thu, 25 Jun 2020 20:42:50 +0530 > Subject: [PATCH] Improvements to hist-based scenecut algorithm. > > This patch does the following: > 1. Add min and max threshold intervals to detect scenecuts. > 2. For those within the range, > Compare colour and edge histogram along with inter and intra satdcosts > to detect scenecuts. > 3. Handle scene transitions. > 4. Change default value of hist-threshold to 0.03 > --- > doc/reST/cli.rst | 7 +-- > source/common/lowres.cpp | 2 + > source/common/lowres.h | 5 ++ > source/common/param.cpp | 2 +- > source/encoder/encoder.cpp | 25 ++++++++-- > source/encoder/encoder.h | 2 +- > source/encoder/slicetype.cpp | 88 +++++++++++++++++++++++++++--------- > source/x265.h | 2 +- > 8 files changed, 101 insertions(+), 32 deletions(-) > > diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst > index b9d795ace..23b74c3d8 100644 > --- a/doc/reST/cli.rst > +++ b/doc/reST/cli.rst > @@ -1468,9 +1468,10 @@ Slice decision options > .. option:: --hist-threshold <0.0..1.0> > > This value represents the threshold for normalized SAD of edge > histograms used in scenecut detection. > - This requires :option:`--hist-scenecut` to be enabled. For example, a > value of 0.2 indicates that a frame with normalized SAD value > - greater than 0.2 against the previous frame as scenecut. > - Default 0.01. > + This requires :option:`--hist-scenecut` to be enabled. For example, a > value of 0.2 indicates that a frame with normalized SAD value > + greater than 0.2 against the previous frame as scenecut. > + Increasing the threshold reduces the number of scenecuts detected. > + Default 0.03. > > .. option:: --radl <integer> > > diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp > index e8dd991bc..8e19ac17c 100644 > --- a/source/common/lowres.cpp > +++ b/source/common/lowres.cpp > @@ -266,6 +266,8 @@ void Lowres::init(PicYuv *origPic, int poc) > indB = 0; > memset(costEst, -1, sizeof(costEst)); > memset(weightedCostDelta, 0, sizeof(weightedCostDelta)); > + interPCostPercDiff = 0.0; > + intraCostPercDiff = 0.0; > > if (qpAqOffset && invQscaleFactor) > memset(costEstAq, -1, sizeof(costEstAq)); > diff --git a/source/common/lowres.h b/source/common/lowres.h > index 5c50fad67..200b1f032 100644 > --- a/source/common/lowres.h > +++ b/source/common/lowres.h > @@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes > uint16_t* propagateCost; > double weightedCostDelta[X265_BFRAME_MAX + 2]; > ReferencePlanes weightedRef[X265_BFRAME_MAX + 2]; > + /* For hist-based scenecut */ > + bool m_bIsMaxThres; > + double interPCostPercDiff; > + double intraCostPercDiff; > + > bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize); > void destroy(); > void init(PicYuv *origPic, int poc); > diff --git a/source/common/param.cpp b/source/common/param.cpp > index 925f0c460..8c0498efc 100644 > --- a/source/common/param.cpp > +++ b/source/common/param.cpp > @@ -168,7 +168,7 @@ void x265_param_default(x265_param* param) > param->bFrameAdaptive = X265_B_ADAPT_TRELLIS; > param->bBPyramid = 1; > param->scenecutThreshold = 40; /* Magic number pulled in from x264 */ > - param->edgeTransitionThreshold = 0.01; > + param->edgeTransitionThreshold = 0.03; > param->bHistBasedSceneCut = 0; > param->lookaheadSlices = 8; > param->lookaheadThreads = 0; > diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp > index f6bc5408d..bec7ff5c0 100644 > --- a/source/encoder/encoder.cpp > +++ b/source/encoder/encoder.cpp > @@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value, > int32_t minValue, int32_t maxValue > return (double)(value - minValue) * (rangeEnd - rangeStart) / > (maxValue - minValue) + rangeStart; > } > > -void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double > maxUVSad, double edgeSad) > +void Encoder::findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup, > double maxUVSad, double edgeSad) > { > + double minEdgeT = m_edgeHistThreshold * 0.5; > + double minChromaT = minEdgeT * 10.0; > + double maxEdgeT = m_edgeHistThreshold * 1.5; > + double maxChromaT = maxEdgeT * 10.0; > pic->frameData.bScenecut = false; > > if (pic->poc == 0) > @@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic, > bool& bDup, double maxUVSad, doub > { > bDup = true; > } > - else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= > m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >= > m_chromaHistThreshold)) > + else if (edgeSad < minEdgeT && maxUVSad < minChromaT) > + { > + pic->frameData.bScenecut = false; > + } > + else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT) > + { > + pic->frameData.bScenecut = true; > + isMax = true; > + } > + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= > m_scaledChromaThreshold > + || (edgeSad > m_edgeHistThreshold && maxUVSad >= > m_chromaHistThreshold)) > { > pic->frameData.bScenecut = true; > bDup = false; > - x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", > pic->poc); > } > } > } > @@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in, > x265_picture* pic_out) > bool dontRead = false; > bool bdropFrame = false; > bool dropflag = false; > + bool isMaxThreshold = false; > > if (m_exportedPic) > { > @@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in, > x265_picture* pic_out) > { > double maxUVSad = 0.0, edgeSad = 0.0; > computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc); > - findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad); > + findSceneCuts(pic, isMaxThreshold, bdropFrame, maxUVSad, > edgeSad); > } > } > > @@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in, > x265_picture* pic_out) > if (m_param->bHistBasedSceneCut) > { > inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut > == 1) ? true : false; > + inFrame->m_lowres.m_bIsMaxThres = isMaxThreshold; > } > if (m_param->bHistBasedSceneCut && m_param->analysisSave) > { > @@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p) > > if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold) > { > - p->edgeTransitionThreshold = 0.01; > + p->edgeTransitionThreshold = 0.03; > x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for > scene cut detection\n", p->edgeTransitionThreshold); > } > > diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h > index fd6b3e72c..1d4fe2476 100644 > --- a/source/encoder/encoder.h > +++ b/source/encoder/encoder.h > @@ -373,7 +373,7 @@ public: > bool computeHistograms(x265_picture *pic); > void computeHistogramSAD(double *maxUVNormalizedSAD, double > *edgeNormalizedSAD, int curPoc); > double normalizeRange(int32_t value, int32_t minValue, int32_t > maxValue, double rangeStart, double rangeEnd); > - void findSceneCuts(x265_picture *pic, bool& bDup, double > m_maxUVSADVal, double m_edgeSADVal); > + void findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup, double > m_maxUVSADVal, double m_edgeSADVal); > > void initRefIdx(); > void analyseRefIdx(int *numRefIdx); > diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp > index 0a95e77d2..27052ca4e 100644 > --- a/source/encoder/slicetype.cpp > +++ b/source/encoder/slicetype.cpp > @@ -2001,10 +2001,40 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, > bool bKeyframe) > int numAnalyzed = numFrames; > bool isScenecut = false; > > - /* When scenecut threshold is set, use scenecut detection for I frame > placements */ > if (m_param->bHistBasedSceneCut) > - isScenecut = frames[1]->bScenecut; > - else > + { > + for (int i = numFrames - 1; i > 0; i--) > + { > + if (frames[i]->interPCostPercDiff > 0.0) > + continue; > + int64_t interCost = frames[i]->costEst[1][0]; > + int64_t intraCost = frames[i]->costEst[0][0]; > + if (interCost < 0 || intraCost < 0) > + continue; > + int times = 0; > + double averageP = 0.0, averageI = 0.0; > + for (int j = i - 1; j >= 0 && times < 5; j--, times++) > + { > + if (frames[j]->costEst[0][0] > 0 && > frames[j]->costEst[1][0] > 0) > + { > + averageI += frames[j]->costEst[0][0]; > + averageP += frames[j]->costEst[1][0]; > + } > + else > + times--; > + } > + if (times) > + { > + averageI = averageI / times; > + averageP = averageP / times; > + frames[i]->interPCostPercDiff = abs(interCost - averageP) > / X265_MIN(interCost, averageP) * 100; > + frames[i]->intraCostPercDiff = abs(intraCost - averageI) > / X265_MIN(intraCost, averageI) * 100; > + } > + } > + } > + > + /* When scenecut threshold is set, use scenecut detection for I frame > placements */ > + if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut && > frames[1]->bScenecut)) > isScenecut = scenecut(frames, 0, 1, true, origNumFrames); > > if (isScenecut && (m_param->bHistBasedSceneCut || > m_param->scenecutThreshold)) > @@ -2018,17 +2048,16 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, > bool bKeyframe) > m_extendGopBoundary = false; > for (int i = m_param->bframes + 1; i < origNumFrames; i += > m_param->bframes + 1) > { > - if (!m_param->bHistBasedSceneCut) > + if (!m_param->bHistBasedSceneCut || > (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut)) > scenecut(frames, i, i + 1, true, origNumFrames); > > for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, > origNumFrames); j++) > { > - if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut > && scenecutInternal(frames, j - 1, j, true)) || > - (m_param->bHistBasedSceneCut && frames[j]->bScenecut)) > - { > - m_extendGopBoundary = true; > - break; > - } > + if (frames[j]->bScenecut && scenecutInternal(frames, j - > 1, j, true)) > + { > + m_extendGopBoundary = true; > + break; > + } > } > if (m_extendGopBoundary) > break; > @@ -2133,14 +2162,15 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, > bool bKeyframe) > { > for (int j = 1; j < numBFrames + 1; j++) > { > - if ((!m_param->bHistBasedSceneCut && scenecut(frames, j, > j + 1, false, origNumFrames)) || > - (m_param->bHistBasedSceneCut && frames[j + > 1]->bScenecut) || > - (bForceRADL && (frames[j]->frameNum == preRADL))) > - { > - frames[j]->sliceType = X265_TYPE_P; > - numAnalyzed = j; > - break; > - } > + bool isNextScenecut = false; > + if (!m_param->bHistBasedSceneCut || > (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut)) > + isNextScenecut = scenecut(frames, j, j + 1, false, > origNumFrames); > + if (isNextScenecut || (bForceRADL && frames[j]->frameNum > == preRADL)) > + { > + frames[j]->sliceType = X265_TYPE_P; > + numAnalyzed = j; > + break; > + } > } > } > resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed > + 1); > @@ -2203,7 +2233,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0, > int p1, bool bRealScenecut, in > * and not considered a scenecut. */ > for (int cp1 = p1; cp1 <= maxp1; cp1++) > { > - if (!scenecutInternal(frames, p0, cp1, false)) > + if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames, > p0, cp1, false)) > { > /* Any frame in between p0 and cur_p1 cannot be a real > scenecut. */ > for (int i = cp1; i > p0; i--) > @@ -2212,7 +2242,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0, > int p1, bool bRealScenecut, in > noScenecuts = false; > } > } > - else if (scenecutInternal(frames, cp1 - 1, cp1, false)) > + else if ((m_param->bHistBasedSceneCut && > frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1, > false)) > { > /* If current frame is a Scenecut from p0 frame as well > as Scenecut from > * preceeding frame, mark it as a Scenecut */ > @@ -2273,6 +2303,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0, > int p1, bool bRealScenecut, in > > if (!frames[p1]->bScenecut) > return false; > + /* Check only scene transitions if max threshold */ > + if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres) > + return frames[p1]->bScenecut; > + > return scenecutInternal(frames, p0, p1, bRealScenecut); > } > > @@ -2289,7 +2323,19 @@ bool Lookahead::scenecutInternal(Lowres **frames, > int p0, int p1, bool bRealScen > /* magic numbers pulled out of thin air */ > float threshMin = (float)(threshMax * 0.25); > double bias = m_param->scenecutBias; > - if (bRealScenecut) > + if (m_param->bHistBasedSceneCut) > + { > + double minT = 50.0 * (1 + m_param->edgeTransitionThreshold); > + if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff > > minT) > + { > + if (bRealScenecut && frame->bScenecut) > + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", > frame->frameNum); > + return frame->bScenecut; > + } > + else > + return false; > + } > + else if (bRealScenecut) > { > if (m_param->keyframeMin == m_param->keyframeMax) > threshMin = threshMax; > diff --git a/source/x265.h b/source/x265.h > index 1e6f9ece6..32feb2bca 100644 > --- a/source/x265.h > +++ b/source/x265.h > @@ -1860,7 +1860,7 @@ typedef struct x265_param > /* A genuine threshold used for histogram based scene cut detection. > * This threshold determines whether a frame is a scenecut or not > * when compared against the edge and chroma histogram sad values. > - * Default 0.01. Range: Real number in the interval (0,2). */ > + * Default 0.03. Range: Real number in the interval (0,1). */ > double edgeTransitionThreshold; > > /* Enables histogram based scenecut detection algorithm to detect > scenecuts. Default disabled */ > -- > 2.24.0.windows.2 > >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel