Please ignore this email. On Thu, Nov 21, 2019 at 9:42 PM Srikanth Kurapati < srikanth.kurap...@multicorewareinc.com> wrote:
> # HG changeset patch > # User Srikanth Kurapati <srikanth.kurap...@multicorewareinc.com> > # Date 1573649311 -19800 > # Wed Nov 13 18:18:31 2019 +0530 > # Node ID 4685693a3c79ced0a7ab83927ed7b4781943d494 > # Parent 04db2bfee5d628d931d1407355b909ac8ff1c898 > Histogram based scenecut detection > > This patch does the following. > 1.Identifies scenecuts by thresholding against sad of edge and chroma > histograms. > 2.Add option "--hist-scenecut" to enable histogram based scenecut method. > 3.Add option "--hist-threshold" to provide threshold for determining > scene-cuts. > 3.Optimizes frame duplication through reuse of sad for marking duplicate > frames. > > diff -r 04db2bfee5d6 -r 4685693a3c79 doc/reST/cli.rst > --- a/doc/reST/cli.rst Thu Oct 31 16:23:27 2019 +0530 > +++ b/doc/reST/cli.rst Wed Nov 13 18:18:31 2019 +0530 > @@ -1426,7 +1426,20 @@ > This value represents the percentage difference between the inter cost > and > intra cost of a frame used in scenecut detection. For example, a value > of 5 indicates, > if the inter cost of a frame is greater than or equal to 95 percent of > the intra cost of the frame, > - then detect this frame as scenecut. Values between 5 and 15 are > recommended. Default 5. > + then detect this frame as scenecut. Values between 5 and 15 are > recommended. Default 5. > + > +.. option:: --hist-scenecut, --no-hist-scenecut > + > + Indicates that scenecuts need to be detected using luma edge and chroma > histograms. > + option: `--hist-scenecut` enables scenecut detection using the > histograms and disables the default scene cut algorithm. > + option: `--no-hist-scenecut` disables histogram based scenecut algorithm. > + > +.. option:: --hist-threshold <0.0..2.0> > + > + This value represents the threshold for normalized SAD of edge > histograms used in scenecut detection. > + This requires option: `--hist-scenecut` to be enabled. For example, a > value of 0.2 indicates that a frame with normalized SAD value > + greater than 0.2 against the previous frame as scenecut. > + Default 0.01. > > .. option:: --radl <integer> > > diff -r 04db2bfee5d6 -r 4685693a3c79 source/CMakeLists.txt > --- a/source/CMakeLists.txt Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/CMakeLists.txt Wed Nov 13 18:18:31 2019 +0530 > @@ -29,7 +29,7 @@ > option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) > mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) > # X265_BUILD must be incremented each time the public API is changed > -set(X265_BUILD 182) > +set(X265_BUILD 183) > configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" > "${PROJECT_BINARY_DIR}/x265.def") > configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" > diff -r 04db2bfee5d6 -r 4685693a3c79 source/common/common.h > --- a/source/common/common.h Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/common/common.h Wed Nov 13 18:18:31 2019 +0530 > @@ -129,12 +129,16 @@ > typedef uint64_t sum2_t; > typedef uint64_t pixel4; > typedef int64_t ssum2_t; > +#define HISTOGRAM_BINS 1024 > +#define SHIFT 1 > #else > typedef uint8_t pixel; > typedef uint16_t sum_t; > typedef uint32_t sum2_t; > typedef uint32_t pixel4; > typedef int32_t ssum2_t; // Signed sum > +#define HISTOGRAM_BINS 256 > +#define SHIFT 0 > #endif // if HIGH_BIT_DEPTH > > #if X265_DEPTH < 10 > diff -r 04db2bfee5d6 -r 4685693a3c79 source/common/param.cpp > --- a/source/common/param.cpp Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/common/param.cpp Wed Nov 13 18:18:31 2019 +0530 > @@ -167,6 +167,8 @@ > param->bFrameAdaptive = X265_B_ADAPT_TRELLIS; > param->bBPyramid = 1; > param->scenecutThreshold = 40; /* Magic number pulled in from x264 */ > + param->edgeTransitionThreshold = 0.01; > + param->bHistBasedSceneCut = false; > param->lookaheadSlices = 8; > param->lookaheadThreads = 0; > param->scenecutBias = 5.0; > @@ -572,6 +574,7 @@ > param->bframes = 0; > param->lookaheadDepth = 0; > param->scenecutThreshold = 0; > + param->bHistBasedSceneCut = false; > param->rc.cuTree = 0; > param->frameNumThreads = 1; > } > @@ -614,7 +617,7 @@ > return 0; > } > > -static int x265_atobool(const char* str, bool& bError) > +static bool x265_atobool(const char* str, bool& bError) > { > if (!strcmp(str, "1") || > !strcmp(str, "true") || > @@ -920,12 +923,13 @@ > OPT("lookahead-slices") p->lookaheadSlices = atoi(value); > OPT("scenecut") > { > - p->scenecutThreshold = atobool(value); > - if (bError || p->scenecutThreshold) > - { > - bError = false; > - p->scenecutThreshold = atoi(value); > - } > + p->scenecutThreshold = atobool(value); > + if (bError || p->scenecutThreshold) > + { > + bError = false; > + p->scenecutThreshold = atoi(value); > + p->bHistBasedSceneCut = false; > + } > } > OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value); > OPT("keyint") p->keyframeMax = atoi(value); > @@ -1191,6 +1195,21 @@ > OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS = > atobool(value); > OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value); > OPT("scenecut-bias") p->scenecutBias = atof(value); > + OPT("hist-scenecut") > + { > + p->bHistBasedSceneCut = atobool(value); > + if (bError) > + { > + bError = false; > + p->bHistBasedSceneCut = false; > + } > + if (p->bHistBasedSceneCut) > + { > + bError = false; > + p->scenecutThreshold = 0; > + } > + } > + OPT("hist-threshold") p->edgeTransitionThreshold = atof(value); > OPT("lookahead-threads") p->lookaheadThreads = atoi(value); > OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); > OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = > atobool(value); > @@ -1632,7 +1651,9 @@ > CHECK(param->scenecutThreshold < 0, > "scenecutThreshold must be greater than 0"); > CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias, > - "scenecut-bias must be between 0 and 100"); > + "scenecut-bias must be between 0 and 100"); > + CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 < > param->edgeTransitionThreshold, > + "hist-threshold must be between 0.0 and 2.0"); > CHECK(param->radl < 0 || param->radl > param->bframes, > "radl must be between 0 and bframes"); > CHECK(param->rdPenalty < 0 || param->rdPenalty > 2, > @@ -1792,9 +1813,13 @@ > x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge > : %s / %d / %d / %d\n", > x265_motion_est_names[param->searchMethod], > param->searchRange, param->subpelRefine, param->maxNumMergeCand); > > - if (param->keyframeMax != INT_MAX || param->scenecutThreshold) > - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / > bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, > param->scenecutThreshold, param->scenecutBias * 100); > - else > + if (param->scenecutThreshold && param->keyframeMax != INT_MAX) > + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / > bias : %d / %d / %d / %.2lf \n", > + param->keyframeMin, param->keyframeMax, > param->scenecutThreshold, param->scenecutBias * 100); > + else if (param->bHistBasedSceneCut && param->keyframeMax != INT_MAX) > + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / > edge threshold : %d / %d / %d / %.2lf\n", > + param->keyframeMin, param->keyframeMax, > param->bHistBasedSceneCut, param->edgeTransitionThreshold); > + else if (param->keyframeMax == INT_MAX) > x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut > : disabled\n"); > > if (param->cbQpOffset || param->crQpOffset) > @@ -1961,6 +1986,7 @@ > s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth); > s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices); > s += sprintf(s, " scenecut=%d", p->scenecutThreshold); > + s += sprintf(s, " hist-scenecut=%d", p->bHistBasedSceneCut); > s += sprintf(s, " radl=%d", p->radl); > BOOL(p->bEnableHRDConcatFlag, "splice"); > BOOL(p->bIntraRefresh, "intra-refresh"); > @@ -2108,6 +2134,7 @@ > BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps"); > BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps"); > s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias); > + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold); > BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp"); > BOOL(p->bAQMotion, "aq-motion"); > BOOL(p->bEmitHDRSEI, "hdr"); > @@ -2261,6 +2288,7 @@ > dst->lookaheadSlices = src->lookaheadSlices; > dst->lookaheadThreads = src->lookaheadThreads; > dst->scenecutThreshold = src->scenecutThreshold; > + dst->bHistBasedSceneCut = src->bHistBasedSceneCut; > dst->bIntraRefresh = src->bIntraRefresh; > dst->maxCUSize = src->maxCUSize; > dst->minCUSize = src->minCUSize; > @@ -2420,6 +2448,7 @@ > dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS; > dst->bMultiPassOptRPS = src->bMultiPassOptRPS; > dst->scenecutBias = src->scenecutBias; > + dst->edgeTransitionThreshold = src->edgeTransitionThreshold; > dst->gopLookahead = src->lookaheadDepth; > dst->bOptCUDeltaQP = src->bOptCUDeltaQP; > dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion; > diff -r 04db2bfee5d6 -r 4685693a3c79 source/encoder/encoder.cpp > --- a/source/encoder/encoder.cpp Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/encoder/encoder.cpp Wed Nov 13 18:18:31 2019 +0530 > @@ -130,12 +130,17 @@ > #if SVT_HEVC > m_svtAppData = NULL; > #endif > - > m_prevTonemapPayload.payload = NULL; > m_startPoint = 0; > m_saveCTUSize = 0; > + m_edgePic = NULL; > + m_edgeHistThreshold = 0; > + m_chromaHistThreshold = 0.0; > + m_scaledEdgeThreshold = 0.0; > + m_scaledChromaThreshold = 0.0; > m_zoneIndex = 0; > } > + > inline char *strcatFilename(const char *input, const char *suffix) > { > char *output = X265_MALLOC(char, strlen(input) + strlen(suffix) + 1); > @@ -210,6 +215,23 @@ > } > } > > + if (m_param->bHistBasedSceneCut) > + { > + for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; > i++) > + { > + m_planeSizes[i] = m_param->sourceWidth * > m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]; > + } > + uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1; > + m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes); > + m_edgeHistThreshold = m_param->edgeTransitionThreshold; > + m_chromaHistThreshold = m_edgeHistThreshold * 10.0; > + m_chromaHistThreshold = x265_min(m_chromaHistThreshold, > MAX_SCENECUT_THRESHOLD); > + m_scaledEdgeThreshold = m_edgeHistThreshold * > SCENECUT_STRENGTH_FACTOR; > + m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold, > MAX_SCENECUT_THRESHOLD); > + m_scaledChromaThreshold = m_chromaHistThreshold * > SCENECUT_STRENGTH_FACTOR; > + m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold, > MAX_SCENECUT_THRESHOLD); > + } > + > // Do not allow WPP if only one row or fewer than 3 columns, it is > pointless and unstable > if (rows == 1 || cols < 3) > { > @@ -854,6 +876,12 @@ > } > } > > + if (m_param->bHistBasedSceneCut) > + { > + if(m_edgePic != NULL) > + X265_FREE_ZERO(m_edgePic); > + } > + > for (int i = 0; i < m_param->frameNumThreads; i++) > { > if (m_frameEncoder[i]) > @@ -1313,6 +1341,142 @@ > dest->planes[2] = (char*)dest->planes[1] + src->stride[1] * > (src->height >> x265_cli_csps[src->colorSpace].height[1]); > } > > +bool Encoder::computeHistograms(x265_picture *pic) > +{ > + pixel *src = (pixel*)pic->planes[0]; > + size_t bufSize = sizeof(pixel) * m_planeSizes[0]; > + int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes; > + int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1; > + memset(m_edgePic, 0, bufSize * numBytes); > + > + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, > pic->width, false)) > + { > + x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!"); > + return false; > + } > + > + pixel pixelVal; > + int64_t size = pic->height * (pic->stride[0] >> SHIFT); > + int32_t *edgeHist = m_curEdgeHist; > + memset(edgeHist, 0, 2 * sizeof(int32_t)); > + for (int64_t i = 0; i < size; i++) > + { > + if (!m_edgePic[i]) > + edgeHist[0]++; > + else > + edgeHist[1]++; > + } > + > + if (pic->colorSpace != X265_CSP_I400) > + { > + /* U Histogram Calculation */ > + int32_t HeightL = (pic->height >> > x265_cli_csps[pic->colorSpace].height[1]); > + size = HeightL * (pic->stride[1] >> SHIFT); > + int32_t *uHist = m_curUVHist[0]; > + pixel *chromaPlane = (pixel *)pic->planes[1]; > + > + memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); > + > + for (int64_t i = 0; i < size; i++) > + { > + pixelVal = chromaPlane[i]; > + uHist[pixelVal]++; > + } > + > + /* V Histogram Calculation */ > + if (planeCount == 3) > + { > + pixelVal = 0; > + int32_t heightV = (pic->height >> > x265_cli_csps[pic->colorSpace].height[2]); > + size = heightV * (pic->stride[2] >> SHIFT); > + int32_t *vHist = m_curUVHist[1]; > + chromaPlane = (pixel *)pic->planes[2]; > + > + memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); > + for (int64_t i = 0; i < size; i++) > + { > + pixelVal = chromaPlane[i]; > + vHist[pixelVal]++; > + } > + for (int i = 0; i < HISTOGRAM_BINS; i++) > + { > + m_curMaxUVHist[i] = x265_max(uHist[i], vHist[i]); > + } > + } > + else > + { /* in case of bi planar color space */ > + memcpy(m_curMaxUVHist, m_curUVHist[0], HISTOGRAM_BINS * > sizeof(int32_t)); > + } > + } > + return true; > +} > + > +void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double > *edgeNormalizedSad, int curPoc) > +{ > + > + if (curPoc == 0) > + { /* first frame is scenecut by default no sad computation for the > same. */ > + *maxUVNormalizedSad = 0.0; > + *edgeNormalizedSad = 0.0; > + } > + else > + { > + /* compute sum of absolute difference of normalized histogram > bins for maxUV and edge histograms. */ > + int32_t edgefreqDiff = 0; > + int32_t maxUVfreqDiff = 0; > + double edgeProbabilityDiff = 0; > + > + for (int j = 0; j < HISTOGRAM_BINS; j++) > + { > + if (j < 2) > + { > + edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]); > + edgeProbabilityDiff = (double) edgefreqDiff / > m_planeSizes[0]; > + *edgeNormalizedSad += edgeProbabilityDiff; > + } > + maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]); > + *maxUVNormalizedSad += (double)maxUVfreqDiff / > m_planeSizes[2]; > + } > + } > + > + /* store histograms of previous frame for reference */ > + size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t); > + memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize); > + memcpy(m_prevEdgeHist, m_curEdgeHist, 2 * sizeof(int32_t)); > +} > + > +void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double > maxUVSad, double edgeSad) > +{ > + pic->frameData.bScenecut = false; > + > + if (pic->poc == 0) > + { > + /* for first frame */ > + pic->frameData.bScenecut = false; > + bDup = false; > + } > + else > + { > + if (edgeSad == 0.0 && maxUVSad == 0.0) > + { > + bDup = true; > + } > + else if (edgeSad > m_edgeHistThreshold && maxUVSad >= > m_chromaHistThreshold) > + { > + pic->frameData.bScenecut = true; > + bDup = false; > + } > + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= > m_scaledChromaThreshold) > + { > + pic->frameData.bScenecut = true; > + bDup = false; > + } > + } > + > + if (pic->frameData.bScenecut) > + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", pic->poc); > +} > + > /** > * Feed one new input frame into the encoder, get one frame out. If > pic_in is > * NULL, a flush condition is implied and pic_in must be NULL for all > subsequent > @@ -1339,6 +1503,8 @@ > const x265_picture* inputPic = NULL; > static int written = 0, read = 0; > bool dontRead = false; > + bool bdropFrame = false; > + bool dropflag = false; > > if (m_exportedPic) > { > @@ -1350,6 +1516,17 @@ > } > if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum < > m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in && > (read < written))) > { > + if (m_param->bHistBasedSceneCut && pic_in) > + { > + x265_picture *pic = (x265_picture *) pic_in; > + if (computeHistograms(pic)) > + { > + double maxUVSad = 0.0, edgeSad = 0.0; > + computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc); > + findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad); > + } > + } > + > if ((m_param->bEnableFrameDuplication && !pic_in && (read < > written))) > dontRead = true; > else > @@ -1368,7 +1545,7 @@ > if (pic_in->bitDepth < 8 || pic_in->bitDepth > 16) > { > x265_log(m_param, X265_LOG_ERROR, "Input bit depth (%d) > must be between 8 and 16\n", > - pic_in->bitDepth); > + pic_in->bitDepth); > return -1; > } > } > @@ -1393,9 +1570,27 @@ > written++; > } > > - psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, > m_dupBuffer[1]->dupPic, m_param); > - > - if (psnrWeight >= m_param->dupThreshold) > + if (m_param->bEnableFrameDuplication && > m_param->bHistBasedSceneCut) > + { > + if (!bdropFrame && > m_dupBuffer[1]->dupPic->frameData.bScenecut == false) > + { > + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, > m_dupBuffer[1]->dupPic, m_param); > + if (psnrWeight >= m_param->dupThreshold) > + dropflag = true; > + } > + else > + { > + dropflag = true; > + } > + } > + else if (m_param->bEnableFrameDuplication) > + { > + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, > m_dupBuffer[1]->dupPic, m_param); > + if (psnrWeight >= m_param->dupThreshold) > + dropflag = true; > + } > + > + if (dropflag) > { > if (m_dupBuffer[0]->bDup) > { > @@ -1428,7 +1623,7 @@ > inputPic = pic_in; > > Frame *inFrame; > - x265_param* p = (m_reconfigure || m_reconfigureRc) ? > m_latestParam : m_param; > + x265_param *p = (m_reconfigure || m_reconfigureRc) ? > m_latestParam : m_param; > if (m_dpb->m_freeList.empty()) > { > inFrame = new Frame; > @@ -1498,6 +1693,10 @@ > inFrame->m_poc = ++m_pocLast; > inFrame->m_userData = inputPic->userData; > inFrame->m_pts = inputPic->pts; > + if (m_param->bHistBasedSceneCut) > + { > + inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut > == 1) ? true : false; > + } > inFrame->m_forceqp = inputPic->forceqp; > inFrame->m_param = (m_reconfigure || m_reconfigureRc) ? > m_latestParam : m_param; > inFrame->m_picStruct = inputPic->picStruct; > @@ -3209,6 +3408,7 @@ > * adaptive I frame placement */ > p->keyframeMax = INT_MAX; > p->scenecutThreshold = 0; > + p->bHistBasedSceneCut = 0; > } > else if (p->keyframeMax <= 1) > { > @@ -3222,6 +3422,7 @@ > p->lookaheadDepth = 0; > p->bframes = 0; > p->scenecutThreshold = 0; > + p->bHistBasedSceneCut = 0; > p->bFrameAdaptive = 0; > p->rc.cuTree = 0; > p->bEnableWeightedPred = 0; > @@ -3881,6 +4082,13 @@ > m_param->searchMethod = m_param->hmeSearchMethod[2]; > } > } > + > + if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold) > + { > + p->edgeTransitionThreshold = 0.01; > + x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for > scene cut detection\n", p->edgeTransitionThreshold); > + } > + > } > > void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, > const x265_picture* picIn, int paramBytes) > diff -r 04db2bfee5d6 -r 4685693a3c79 source/encoder/encoder.h > --- a/source/encoder/encoder.h Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/encoder/encoder.h Wed Nov 13 18:18:31 2019 +0530 > @@ -156,7 +156,6 @@ > bool bDup; > }; > > - > class FrameEncoder; > class DPB; > class Lookahead; > @@ -164,6 +163,9 @@ > class ThreadPool; > class FrameData; > > +#define MAX_SCENECUT_THRESHOLD 2.0 > +#define SCENECUT_STRENGTH_FACTOR 2.0 > + > class Encoder : public x265_encoder > { > public: > @@ -228,7 +230,7 @@ > bool m_reconfigureRc; > bool m_reconfigureZone; > > - int m_saveCtuDistortionLevel; > + int m_saveCtuDistortionLevel; > > /* Begin intra refresh when one not in progress or else begin one as > soon as the current > * one is done. Requires bIntraRefresh to be set.*/ > @@ -245,11 +247,24 @@ > Lock m_rpsInSpsLock; > int m_rpsInSpsCount; > /* For HDR*/ > - double m_cB; > - double m_cR; > + double m_cB; > + double m_cR; > + > + int m_bToneMap; // Enables tone-mapping > + int m_enableNal; > > - int m_bToneMap; // Enables tone-mapping > - int m_enableNal; > + /* For histogram based scene-cut detection */ > + pixel* m_edgePic; > + int32_t m_curUVHist[2][HISTOGRAM_BINS]; > + int32_t m_curMaxUVHist[HISTOGRAM_BINS]; > + int32_t m_prevMaxUVHist[HISTOGRAM_BINS]; > + int32_t m_curEdgeHist[2]; > + int32_t m_prevEdgeHist[2]; > + uint32_t m_planeSizes[3]; > + double m_edgeHistThreshold; > + double m_chromaHistThreshold; > + double m_scaledEdgeThreshold; > + double m_scaledChromaThreshold; > > #ifdef ENABLE_HDR10_PLUS > const hdr10plus_api *m_hdr10plus_api; > @@ -355,6 +370,10 @@ > > void copyPicture(x265_picture *dest, const x265_picture *src); > > + bool computeHistograms(x265_picture *pic); > + void computeHistogramSAD(double *maxUVNormalizedSAD, double > *edgeNormalizedSAD, int curPoc); > + void findSceneCuts(x265_picture *pic, bool& bDup, double > m_maxUVSADVal, double m_edgeSADVal); > + > void initRefIdx(); > void analyseRefIdx(int *numRefIdx); > void updateRefIdx(); > diff -r 04db2bfee5d6 -r 4685693a3c79 source/encoder/ratecontrol.cpp > --- a/source/encoder/ratecontrol.cpp Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/encoder/ratecontrol.cpp Wed Nov 13 18:18:31 2019 +0530 > @@ -508,6 +508,7 @@ > CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP); > CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax); > CMP_OPT_FIRST_PASS("scenecut", > m_param->scenecutThreshold); > + CMP_OPT_FIRST_PASS("hist-threshold", > m_param->edgeTransitionThreshold); > CMP_OPT_FIRST_PASS("intra-refresh", > m_param->bIntraRefresh); > if (m_param->bMultiPassOptRPS) > { > @@ -1200,6 +1201,7 @@ > m_param->rc.bStatRead = 0; > m_param->bFrameAdaptive = 0; > m_param->scenecutThreshold = 0; > + m_param->bHistBasedSceneCut = false; > m_param->rc.cuTree = 0; > if (m_param->bframes > 1) > m_param->bframes = 1; > @@ -2284,7 +2286,7 @@ > if (m_isVbv && m_currentSatd > 0 && curFrame) > { > if (m_param->lookaheadDepth || m_param->rc.cuTree || > - m_param->scenecutThreshold || > + (m_param->scenecutThreshold || m_param->bHistBasedSceneCut) || > (m_param->bFrameAdaptive && m_param->bframes)) > { > /* Lookahead VBV: If lookahead is done, raise the quantizer as > necessary > diff -r 04db2bfee5d6 -r 4685693a3c79 source/encoder/slicetype.cpp > --- a/source/encoder/slicetype.cpp Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/encoder/slicetype.cpp Wed Nov 13 18:18:31 2019 +0530 > @@ -85,6 +85,69 @@ > > } // end anonymous namespace > > +namespace X265_NS { > + > +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, > intptr_t stride, int height, int width, bool bcalcTheta) > +{ > + intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = > 0, colThree = 0; > + intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, > bottomRight = 0; > + > + const int startIndex = 1; > + > + if (!edgePic || !refPic || (!edgeTheta && bcalcTheta)) > + { > + return false; > + } > + else > + { > + float gradientH = 0, gradientV = 0, radians = 0, theta = 0; > + float gradientMagnitude = 0; > + pixel blackPixel = 0; > + > + //Applying Sobel filter expect for border pixels > + height = height - startIndex; > + width = width - startIndex; > + for (int rowNum = startIndex; rowNum < height; rowNum++) > + { > + rowTwo = rowNum * stride; > + rowOne = rowTwo - stride; > + rowThree = rowTwo + stride; > + > + for (int colNum = startIndex; colNum < width; colNum++) > + { > + > + /* Horizontal and vertical gradients > + [ -3 0 3 ] [-3 -10 -3 ] > + gH =[ -10 0 10] gV = [ 0 0 0 ] > + [ -3 0 3 ] [ 3 10 3 ] */ > + > + colTwo = colNum; > + colOne = colTwo - startIndex; > + colThree = colTwo + startIndex; > + middle = rowTwo + colTwo; > + topLeft = rowOne + colOne; > + topRight = rowOne + colThree; > + bottomLeft = rowThree + colOne; > + bottomRight = rowThree + colThree; > + gradientH = (float)(-3 * refPic[topLeft] + 3 * > refPic[topRight] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo + > colThree] - 3 * refPic[bottomLeft] + 3 * refPic[bottomRight]); > + gradientV = (float)(-3 * refPic[topLeft] - 10 * > refPic[rowOne + colTwo] - 3 * refPic[topRight] + 3 * refPic[bottomLeft] + > 10 * refPic[rowThree + colTwo] + 3 * refPic[bottomRight]); > + gradientMagnitude = sqrtf(gradientH * gradientH + > gradientV * gradientV); > + if(bcalcTheta) > + { > + edgeTheta[middle] = 0; > + radians = atan2(gradientV, gradientH); > + theta = (float)((radians * 180) / PI); > + if (theta < 0) > + theta = 180 + theta; > + edgeTheta[middle] = (pixel)theta; > + } > + edgePic[middle] = (pixel)(gradientMagnitude >= > edgeThreshold ? edgeThreshold : blackPixel); > + } > + } > + return true; > + } > +} > + > void edgeFilter(Frame *curFrame, x265_param* param) > { > int height = curFrame->m_fencPic->m_picHeight; > @@ -114,6 +177,7 @@ > //Applying Gaussian filter on the picture > src = (pixel*)curFrame->m_fencPic->m_picOrg[0]; > refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY > * stride + curFrame->m_fencPic->m_lumaMarginX; > + edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * > stride + curFrame->m_fencPic->m_lumaMarginX; > pixel pixelValue = 0; > > for (int rowNum = 0; rowNum < height; rowNum++) > @@ -146,51 +210,8 @@ > } > } > > -#if HIGH_BIT_DEPTH //10-bit build > - float threshold = 1023; > - pixel whitePixel = 1023; > -#else > - float threshold = 255; > - pixel whitePixel = 255; > -#endif > -#define PI 3.14159265 > - > - float gradientH = 0, gradientV = 0, radians = 0, theta = 0; > - float gradientMagnitude = 0; > - pixel blackPixel = 0; > - edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * > stride + curFrame->m_fencPic->m_lumaMarginX; > - //Applying Sobel filter on the gaussian filtered picture > - for (int rowNum = 0; rowNum < height; rowNum++) > - { > - for (int colNum = 0; colNum < width; colNum++) > - { > - edgeTheta[(rowNum*stride) + colNum] = 0; > - if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1) > && (colNum != width - 1)) //Ignoring the border pixels of the picture > - { > - /*Horizontal and vertical gradients > - [ -3 0 3 ] [-3 -10 -3 ] > - gH = [ -10 0 10] gV = [ 0 0 0 ] > - [ -3 0 3 ] [ 3 10 3 ]*/ > - > - const intptr_t rowOne = (rowNum - 1)*stride, colOne = > colNum -1; > - const intptr_t rowTwo = rowNum * stride, colTwo = colNum; > - const intptr_t rowThree = (rowNum + 1)*stride, colThree = > colNum + 1; > - const intptr_t index = (rowNum*stride) + colNum; > - > - gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 * > refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 * > refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 * > refPic[rowThree + colThree]); > - gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 * > refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 * > refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 * > refPic[rowThree + colThree]); > - > - gradientMagnitude = sqrtf(gradientH * gradientH + > gradientV * gradientV); > - radians = atan2(gradientV, gradientH); > - theta = (float)((radians * 180) / PI); > - if (theta < 0) > - theta = 180 + theta; > - edgeTheta[(rowNum*stride) + colNum] = (pixel)theta; > - > - edgePic[index] = gradientMagnitude >= threshold ? > whitePixel : blackPixel; > - } > - } > - } > + if(!computeEdge(edgePic, refPic, edgeTheta, stride, height, width, > true)) > + x265_log(NULL, X265_LOG_ERROR, "Failed edge computation!"); > } > > //Find the angle of a block by averaging the pixel angles > @@ -1471,7 +1492,7 @@ > > if (m_lastNonB && !m_param->rc.bStatRead && > ((m_param->bFrameAdaptive && m_param->bframes) || > - m_param->rc.cuTree || m_param->scenecutThreshold || > + m_param->rc.cuTree || m_param->scenecutThreshold || > m_param->bHistBasedSceneCut || > (m_param->lookaheadDepth && m_param->rc.vbvBufferSize))) > { > slicetypeAnalyse(frames, false); > @@ -1971,10 +1992,15 @@ > > int numBFrames = 0; > int numAnalyzed = numFrames; > - bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames); > + bool isScenecut = false; > > /* When scenecut threshold is set, use scenecut detection for I frame > placements */ > - if (m_param->scenecutThreshold && isScenecut) > + if (m_param->scenecutThreshold) > + isScenecut = scenecut(frames, 0, 1, true, origNumFrames); > + else if (m_param->bHistBasedSceneCut) > + isScenecut = frames[1]->bScenecut; > + > + if (isScenecut) > { > frames[1]->sliceType = X265_TYPE_I; > return; > @@ -1985,14 +2011,17 @@ > m_extendGopBoundary = false; > for (int i = m_param->bframes + 1; i < origNumFrames; i += > m_param->bframes + 1) > { > - scenecut(frames, i, i + 1, true, origNumFrames); > + if (m_param->scenecutThreshold) > + scenecut(frames, i, i + 1, true, origNumFrames); > + > for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, > origNumFrames); j++) > { > - if (frames[j]->bScenecut && scenecutInternal(frames, j - > 1, j, true) ) > - { > - m_extendGopBoundary = true; > - break; > - } > + if (( m_param->scenecutThreshold && frames[j]->bScenecut > && scenecutInternal(frames, j - 1, j, true)) || > + (m_param->bHistBasedSceneCut && frames[j]->bScenecut)) > + { > + m_extendGopBoundary = true; > + break; > + } > } > if (m_extendGopBoundary) > break; > @@ -2097,13 +2126,14 @@ > { > for (int j = 1; j < numBFrames + 1; j++) > { > - if (scenecut(frames, j, j + 1, false, origNumFrames) || > + if ((m_param->scenecutThreshold && scenecut(frames, j, j > + 1, false, origNumFrames)) || > + (m_param->bHistBasedSceneCut && frames[j + > 1]->bScenecut) || > (bForceRADL && (frames[j]->frameNum == preRADL))) > - { > - frames[j]->sliceType = X265_TYPE_P; > - numAnalyzed = j; > - break; > - } > + { > + frames[j]->sliceType = X265_TYPE_P; > + numAnalyzed = j; > + break; > + } > } > } > resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed > + 1); > @@ -3289,3 +3319,5 @@ > fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq; > fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost, > LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT)); > } > + > +} > diff -r 04db2bfee5d6 -r 4685693a3c79 source/encoder/slicetype.h > --- a/source/encoder/slicetype.h Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/encoder/slicetype.h Wed Nov 13 18:18:31 2019 +0530 > @@ -43,6 +43,13 @@ > #define AQ_EDGE_BIAS 0.5 > #define EDGE_INCLINATION 45 > > +#ifdef HIGH_BIT_DEPTH > +#define edgeThreshold 1023.0 > +#else > +#define edgeThreshold 255.0 > +#endif > +#define PI 3.14159265 > + > /* Thread local data for lookahead tasks */ > struct LookaheadTLD > { > @@ -258,6 +265,7 @@ > CostEstimateGroup& operator=(const CostEstimateGroup&); > }; > > -} > +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, > intptr_t stride, int height, int width, bool bcalcTheta); > > +} > #endif // ifndef X265_SLICETYPE_H > diff -r 04db2bfee5d6 -r 4685693a3c79 source/test/regression-tests.txt > --- a/source/test/regression-tests.txt Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/test/regression-tests.txt Wed Nov 13 18:18:31 2019 +0530 > @@ -159,6 +159,8 @@ > Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60 > --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 > Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones 0,139,q=32 > Island_960x540_420p_8bit_24fps.yuv,--no-cutree --aq-mode 0 --bitrate 6000 > --scenecut-aware-qp > +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut > --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 > --vbv-bufsize 15000 --vbv-maxrate 12000 > +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut > --hist-threshold 0.02 > > # Main12 intraCost overflow bug test > 720p50_parkrun_ter.y4m,--preset medium > diff -r 04db2bfee5d6 -r 4685693a3c79 source/x265.h > --- a/source/x265.h Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/x265.h Wed Nov 13 18:18:31 2019 +0530 > @@ -1024,7 +1024,8 @@ > int lookaheadSlices; > > /* An arbitrary threshold which determines how aggressively the > lookahead > - * should detect scene cuts. The default (40) is recommended. */ > + * should detect scene cuts for cost based scenecut detection. > + * The default (40) is recommended. */ > int scenecutThreshold; > > /* Replace keyframes by using a column of intra blocks that move > across the video > @@ -1839,14 +1840,24 @@ > * Default is disabled. */ > int bEnableSceneCutAwareQp; > > - /*The duration(in milliseconds) for which there is a reduction in the > bits spent on the inter-frames after a scenecut > + /* The duration(in milliseconds) for which there is a reduction in > the bits spent on the inter-frames after a scenecut > * by increasing their QP, when bEnableSceneCutAwareQp is set. > Default is 500ms.*/ > int scenecutWindow; > > /* The offset by which QP is incremented for inter-frames when > bEnableSceneCutAwareQp is set. > * Default is +5. */ > int maxQpDelta; > + > + /* A genuine threshold used for histogram based scene cut detection. > + * This threshold determines whether a frame is a scenecut or not > + * when compared against the edge and chroma histogram sad values. > + * Default 0.01. Range: Real number in the interval (0,2). */ > + double edgeTransitionThreshold; > + > + /* Enables histogram based scenecut detection algorithm to detect > scenecuts. Default disabled */ > + bool bHistBasedSceneCut; > } x265_param; > + > /* x265_param_alloc: > * Allocates an x265_param instance. The returned param structure is not > * special in any way, but using this method together with > x265_param_free() > diff -r 04db2bfee5d6 -r 4685693a3c79 source/x265cli.h > --- a/source/x265cli.h Thu Oct 31 16:23:27 2019 +0530 > +++ b/source/x265cli.h Wed Nov 13 18:18:31 2019 +0530 > @@ -129,6 +129,9 @@ > { "scenecut", required_argument, NULL, 0 }, > { "no-scenecut", no_argument, NULL, 0 }, > { "scenecut-bias", required_argument, NULL, 0 }, > + { "hist-scenecut", no_argument, NULL, 0}, > + { "no-hist-scenecut", no_argument, NULL, 0}, > + { "hist-threshold", required_argument, NULL, 0}, > { "fades", no_argument, NULL, 0 }, > { "no-fades", no_argument, NULL, 0 }, > { "scenecut-aware-qp", no_argument, NULL, 0 }, > @@ -489,7 +492,10 @@ > H0(" --gop-lookahead <integer> Extends gop boundary if a > scenecut is found within this from keyint boundary. Default 0\n"); > H0(" --no-scenecut Disable adaptive I-frame > decision\n"); > H0(" --scenecut <integer> How aggressively to insert extra > I-frames. Default %d\n", param->scenecutThreshold); > - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. > Default %.2f\n", param->scenecutBias); > + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. > Default %.2f\n", param->scenecutBias); > + H0(" --hist-scenecut Enables histogram based > scene-cut detection using histogram based algorithm.\n"); > + H0(" --no-hist-scenecut Disables histogram based > scene-cut detection using histogram based algorithm.\n"); > + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized > SAD threshold for histogram based scenecut detection Default %.2f\n", > param->edgeTransitionThreshold); > H0(" --[no-]fades Enable detection and handling of > fade-in regions. Default %s\n", OPT(param->bEnableFades)); > H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames > inside the scenecut window after scenecut. Default %s\n", > OPT(param->bEnableSceneCutAwareQp)); > H1(" --scenecut-window <0..1000> QP incremental duration(in > milliseconds) when scenecut-aware-qp is enabled. Default %d\n", > param->scenecutWindow); > > -- > *With Regards,* > *Srikanth Kurapati.* > -- *With Regards,* *Srikanth Kurapati.*
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel