Thanks for this additional explanation, and thanks again for your contribution!
*From:* x265-devel [mailto:[email protected]] *On Behalf Of *Ximing Cheng *Sent:* Friday, August 11, 2017 12:32 PM *To:* Ximing Cheng *Subject:* Re: [x265] [PATCH] intra: skip RD analysis when sum of sub CUsplitcostbigger than non-split cost In fact, this skip is not a fast skip algorithm. As the sum of split cost is larger than none split CU's best cost (both rdcost of sub-cu and none split CU are without split flag cost), which means splitting into 4 parts at this depth of cu is a worse case compared with none split CU. So that, the remain N * 1/4 parts of CU analysis is useless. .................... . A . B . . . . .................... . C . D . . . . .................... (A B C D is the 4 parts of a CU) If sum of sub CU split cost(A_Cost + B_Cost) larger than non-split cost(NSCost), assume NSCost < A_Cost + B_Cost, the remain parts (C, D) continue to analysis rd. C_Cost + D_Cost >= 0 ---> NSCost < A_Cost + B_Cost + C_Cost + D_Cost ---> (likely that) NSCost + splitCost(splitflag = 0) < A_Cost + B_Cost + C_Cost + D_Cost + splitCost(splitflag = 1) ---> choose none split So, C and D rd analysis can be skipped. So in my test cases, the MD5 checksum of the output bitstream is the same with the original after this skip. ------------------ Original ------------------ *From: * "Ximing Cheng";<[email protected]>; *Send time:* Friday, Aug 4, 2017 1:56 AM *To:* "x265-devel"<[email protected]>; *Subject: * [x265] [PATCH] intra: skip RD analysis when sum of sub CUsplitcostbigger than non-split cost # HG changeset patch # User Ximing Cheng <[email protected]> # Date 1501782508 -28800 # Fri Aug 04 01:48:28 2017 +0800 # Node ID 5943a1f73d5814a3a723f814a4dd0635b1fe2b35 # Parent d11482e5fedbcdaf62ee3c6872f43827d99ad181 intra: skip RD analysis when sum of sub CUsplitcost bigger than non-split cost diff -r d11482e5fedb -r 5943a1f73d58 source/CMakeLists.txt --- a/source/CMakeLists.txt Mon Jul 24 11:15:38 2017 +0530 +++ b/source/CMakeLists.txt Fri Aug 04 01:48:28 2017 +0800 @@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 131) +set(X265_BUILD 132) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" diff -r d11482e5fedb -r 5943a1f73d58 source/common/param.cpp --- a/source/common/param.cpp Mon Jul 24 11:15:38 2017 +0530 +++ b/source/common/param.cpp Fri Aug 04 01:48:28 2017 +0800 @@ -157,6 +157,7 @@ param->bEnableConstrainedIntra = 0; param->bEnableStrongIntraSmoothing = 1; param->bEnableFastIntra = 0; + param->bEnableSplitRdSkip = 0; /* Inter Coding tools */ param->searchMethod = X265_HEX_SEARCH; @@ -975,6 +976,7 @@ OPT("refine-inter")p->interRefine = atobool(value); OPT("refine-mv")p->mvRefine = atobool(value); OPT("force-flush")p->forceFlush = atoi(value); + OPT("splitrd-skip") p->bEnableSplitRdSkip = atobool(value); else return X265_PARAM_BAD_NAME; } @@ -1431,6 +1433,7 @@ TOOLOPT(param->bEnableRdRefine, "rd-refine"); TOOLOPT(param->bEnableEarlySkip, "early-skip"); TOOLOPT(param->bEnableRecursionSkip, "rskip"); + TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip"); TOOLVAL(param->noiseReductionIntra, "nr-intra=%d"); TOOLVAL(param->noiseReductionInter, "nr-inter=%d"); TOOLOPT(param->bEnableTSkipFast, "tskip-fast"); @@ -1560,6 +1563,7 @@ BOOL(p->bEnableTSkipFast, "tskip-fast"); BOOL(p->bCULossless, "cu-lossless"); BOOL(p->bIntraInBFrames, "b-intra"); + BOOL(p->bEnableSplitRdSkip, "splitrd-skip"); s += sprintf(s, " rdpenalty=%d", p->rdPenalty); s += sprintf(s, " psy-rd=%.2f", p->psyRd); s += sprintf(s, " psy-rdoq=%.2f", p->psyRdoq); diff -r d11482e5fedb -r 5943a1f73d58 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Jul 24 11:15:38 2017 +0530 +++ b/source/encoder/analysis.cpp Fri Aug 04 01:48:28 2017 +0800 @@ -485,7 +485,7 @@ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx); } -void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp) +uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp) { uint32_t depth = cuGeom.depth; ModeDepth& md = m_modeDepth[depth]; @@ -560,6 +560,8 @@ invalidateContexts(nextDepth); Entropy* nextContext = &m_rqt[depth].cur; int32_t nextQP = qp; + uint64_t curCost = 0; + int skipSplitCheck = 0; for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) { @@ -572,7 +574,17 @@ if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth) nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom)); - compressIntraCU(parentCTU, childGeom, nextQP); + if (m_param->bEnableSplitRdSkip) + { + curCost += compressIntraCU(parentCTU, childGeom, nextQP); + if (m_modeDepth[depth].bestMode && curCost > m_modeDepth[depth].bestMode->rdCost) + { + skipSplitCheck = 1; + break; + } + } + else + compressIntraCU(parentCTU, childGeom, nextQP); // Save best CU and pred data for this sub CU splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx); @@ -590,14 +602,17 @@ memset(parentCTU.m_cuDepth + childGeom.absPartIdx, 0, childGeom.numPartitions); } } - nextContext->store(splitPred->contexts); - if (mightNotSplit) - addSplitFlagCost(*splitPred, cuGeom.depth); - else - updateModeCost(*splitPred); - - checkDQPForSplitPred(*splitPred, cuGeom); - checkBestMode(*splitPred, depth); + if (!skipSplitCheck) + { + nextContext->store(splitPred->contexts); + if (mightNotSplit) + addSplitFlagCost(*splitPred, cuGeom.depth); + else + updateModeCost(*splitPred); + + checkDQPForSplitPred(*splitPred, cuGeom); + checkBestMode(*splitPred, depth); + } } if (m_param->bEnableRdRefine && depth <= m_slice->m_pps->maxCuDQPDepth) @@ -620,6 +635,8 @@ md.bestMode->cu.copyToPic(depth); if (md.bestMode != &md.pred[PRED_SPLIT]) md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx); + + return md.bestMode->rdCost; } void Analysis::PMODE::processTasks(int workerThreadId) diff -r d11482e5fedb -r 5943a1f73d58 source/encoder/analysis.h --- a/source/encoder/analysis.h Mon Jul 24 11:15:38 2017 +0530 +++ b/source/encoder/analysis.h Fri Aug 04 01:48:28 2017 +0800 @@ -145,7 +145,7 @@ void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp); /* full analysis for an I-slice CU */ - void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp); + uint64_t compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp); /* full analysis for a P or B slice CU */ uint32_t compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp); diff -r d11482e5fedb -r 5943a1f73d58 source/x265.h --- a/source/x265.h Mon Jul 24 11:15:38 2017 +0530 +++ b/source/x265.h Fri Aug 04 01:48:28 2017 +0800 @@ -1482,6 +1482,9 @@ /* Force flushing the frames from encoder */ int forceFlush; + + /* Enable skipping split RD analysis when sum of split CU rdCost larger than none split CU rdCost for Intra CU */ + int bEnableSplitRdSkip; } x265_param; /* x265_param_alloc: diff -r d11482e5fedb -r 5943a1f73d58 source/x265cli.h --- a/source/x265cli.h Mon Jul 24 11:15:38 2017 +0530 +++ b/source/x265cli.h Fri Aug 04 01:48:28 2017 +0800 @@ -281,6 +281,8 @@ { "refine-mv", no_argument, NULL, 0 }, { "no-refine-mv", no_argument, NULL, 0 }, { "force-flush", required_argument, NULL, 0 }, + { "splitrd-skip", no_argument, NULL, 0 }, + { "no-splitrd-skip", no_argument, NULL, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, @@ -375,6 +377,7 @@ H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); H0(" --[no-]rskip Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip)); H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); + H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than none split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
