[x265] [PATCH] Added fast intra search option to Analysis::checkIntraInInter_rd0_4
# HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408026426 25200 # Node ID 81766e60e622f28c12766f277b087cfeccff9cc3 # Parent 6b741cce14acb610a2a17a08f51898ea18b16a35 Added fast intra search option to Analysis::checkIntraInInter_rd0_4 diff -r 6b741cce14ac -r 81766e60e622 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Aug 14 12:53:52 2014 +0530 +++ b/source/encoder/analysis.cpp Thu Aug 14 07:27:06 2014 -0700 @@ -26,6 +26,7 @@ #include common.h #include rdcost.h #include encoder.h +#include predict.h #include PPA/ppa.h using namespace x265; @@ -1655,6 +1656,7 @@ } pixelcmp_t sa8d = primitives.sa8d[sizeIdx]; +int predsize = scaleTuSize * scaleTuSize; uint32_t preds[3]; cu-getIntraDirLumaPredictor(partOffset, preds); @@ -1685,23 +1687,79 @@ bits = !(mpms ((uint64_t)1 mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth); cost = m_rdCost.calcRdSADCost(sad, bits); COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits); - + // Transpose NxN primitives.transpose[sizeIdx](buf_trans, fenc, scaleStride); - primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize = 16)); -for (mode = 2; mode 35; mode++) +bool modeHor; +pixel *cmp; +intptr_t srcStride; +if (m_param-bEnableFastIntra) { -bool modeHor = (mode 18); -pixel *cmp = (modeHor ? buf_trans : fenc); -intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride); -sad = sa8d(cmp, srcStride, tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) costShift; +int lowsad, highsad, asad = 0; +uint32_t lowbits, highbits, amode, lowmode, highmode, abits = 0; +uint64_t lowcost, highcost = MAX_INT64, acost = MAX_INT64; + +for (mode = 4;mode 35; mode += 5) +{ +modeHor = (mode 18); +cmp = (modeHor ? buf_trans : fenc); +srcStride = (modeHor ? scaleTuSize : scaleStride); +sad = sa8d(cmp, srcStride, tmp[(mode - 2) * predsize], scaleTuSize) costShift; +bits = !(mpms ((uint64_t)1 mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth); +cost = m_rdCost.calcRdSADCost(sad, bits); +COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits); +} +lowmode = amode - 2; +modeHor = (lowmode 18); +cmp = (modeHor ? buf_trans : fenc); +srcStride = (modeHor ? scaleTuSize : scaleStride); +lowsad = sa8d(cmp, srcStride, tmp[(lowmode - 2) * predsize], scaleTuSize) costShift; +lowbits = !(mpms ((uint64_t)1 lowmode)) ? rbits : xModeBitsIntra(cu, lowmode, partOffset, depth); +lowcost = m_rdCost.calcRdSADCost(lowsad, lowbits); +if (bmode 34) +{ +highmode = amode + 2; +modeHor = (highmode 18); +cmp = (modeHor ? buf_trans : fenc); +srcStride = (modeHor ? scaleTuSize : scaleStride); +highsad = sa8d(cmp, srcStride, tmp[(highmode - 2) * predsize], scaleTuSize) costShift; +highbits = !(mpms ((uint64_t)1 highmode)) ? rbits : xModeBitsIntra(cu, highmode, partOffset, depth); +highcost = m_rdCost.calcRdSADCost(highsad, highbits); +} +if (lowcost = highcost) +{ +mode = amode - 1; +COPY4_IF_LT(acost, lowcost, amode, lowmode, asad, lowsad, abits, lowbits); +} +else +{ +mode = amode + 1; +COPY4_IF_LT(acost, highcost, amode, highmode, asad, highsad, abits, highbits); +} +modeHor = (mode 18); +cmp = (modeHor ? buf_trans : fenc); +srcStride = (modeHor ? scaleTuSize : scaleStride); +sad = sa8d(cmp, srcStride, tmp[(mode - 2) * predsize], scaleTuSize) costShift; bits = !(mpms ((uint64_t)1 mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth); cost = m_rdCost.calcRdSADCost(sad, bits); -COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits); +COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits); +COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits); } - +else // calculate and search all intra prediction angles for lowest cost +{ +for (mode = 2; mode 35; mode++) +{ +modeHor = (mode 18); +cmp = (modeHor ? buf_trans : fenc); +srcStride = (modeHor ? scaleTuSize : scaleStride); +sad = sa8d(cmp, srcStride, tmp[(mode - 2) * predsize], scaleTuSize) costShift; +bits = !(mpms ((uint64_t)1 mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth); +cost = m_rdCost.calcRdSADCost(sad, bits); +COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits); +} +} cu-m_totalBits = bbits; cu-m_totalDistortion = bsad;
Re: [x265] [PATCH] Added fast intra search option
There are a couple of warnings our regression tests caught with this. Can you take a look? source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' : forcing value to bool 'true' or 'false' (performance warning) (IntraFilterType can be bool, I think?). C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning C4701: potentially uninitialized local variable 'lowmode' used Thanks, Deepthi On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +highcost = m_me.COST_MAX; +if (lowmode 34) +{ +mode = lowmode + 2; +if (mode 18)
Re: [x265] [PATCH] Added fast intra search option
In building with gcc debian 4.7.2-5 I get no warnings. On 08/13/2014 05:46 AM, Deepthi Nandakumar wrote: There are a couple of warnings our regression tests caught with this. Can you take a look? source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' : forcing value to bool 'true' or 'false' (performance warning) (IntraFilterType can be bool, I think?). Initially I used bool for the table but unsigned char performed better on my old system. C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning C4701: potentially uninitialized local variable 'lowmode' used I'll submit a patch to set lowmode to a default. Thanks, Deepthi On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com mailto:dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com mailto:dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; - primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); + primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost)
Re: [x265] [PATCH] Added fast intra search option
On 08/12/2014 10:22 PM, Steve Borho wrote: On 08/12, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd on my newer CPUs, this version was unambiguously faster; so I've pushed this version, thanks. How were you testing it? I was encoding a 2 minute video with -I 1. I also have a patch that changes m_predictions from EstimateRow member pointer to enough dynamically allocated memory to hold all 35 predictions to a local array of EstimateRow::estimateCUCost big enough to hold one prediction which can't be used with allangs and so would only be useful with the other fast-intra version. Again, I didn't seem to help much on my system but if you would like to try it I'll submit a patch. diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +
[x265] [PATCH] Added Fast intra search option
# HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407881349 25200 # Node ID 1e079a117f0f381c97753d74404a6a943ab3ff1d # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added Fast intra search option This version calls intra_pred and satd for each mode searched and also uses the IntraFilterType table that was moved from intrapred.cpp to predict.h diff -r 8a7f4bb1d1be -r 1e079a117f0f source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:09:09 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 1e079a117f0f source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:09:09 2014 -0700 @@ -33,6 +33,7 @@ #include slicetype.h #include motion.h #include ratecontrol.h +#include predict.h #define NUM_CUS (m_widthInCU 2 m_heightInCU 2 ? (m_widthInCU - 2) * (m_heightInCU - 2) : m_widthInCU * m_heightInCU) @@ -1242,6 +1243,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1675,27 +1677,89 @@ } int predsize = cuSize * cuSize; +int icost = m_me.COST_MAX, cost; +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -// generate 35 intra predictions into tmp +// generate intra predictions into m_predictions primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); -primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; +uint32_t lowmode, mode; +// fast intra prediction angle search +if (m_param-bEnableFastIntra) +{ +int acost = m_me.COST_MAX; +for (mode = 4;mode 35; mode += 5) +{ +left = (IntraFilterType[sizeIdx][mode] ? left1 : left0); +above = (IntraFilterType[sizeIdx][mode] ? above1 : above0); +primitives.intra_pred[sizeIdx][mode](m_predictions, cuSize, left, above, mode, cuSize = 16); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +left = (IntraFilterType[sizeIdx][mode] ? left1 : left0); +above = (IntraFilterType[sizeIdx][mode] ? above1 : above0); +primitives.intra_pred[sizeIdx][mode](m_predictions, cuSize, left, above, mode, cuSize = 16); +int lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +int highcost = m_me.COST_MAX; +if (lowmode 34) +{ +mode = lowmode + 2; +left = (IntraFilterType[sizeIdx][mode] ? left1 : left0); +above = (IntraFilterType[sizeIdx][mode] ? above1 : above0); +primitives.intra_pred[sizeIdx][mode](m_predictions, cuSize, left, above, mode, cuSize = 16); +highcost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +} +if (lowcost = highcost) +{ +mode = lowmode - 1; +left = (IntraFilterType[sizeIdx][mode] ? left1 : left0); +above =
[x265] [PATCH] Added fast intra search option
# HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +highcost = m_me.COST_MAX; +if (lowmode 34) +{ +mode = lowmode + 2; +if (mode 18) +highcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +highcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +} +if (lowcost = highcost) +{ +mode = lowmode - 1; +if (lowcost acost) +acost = lowcost; +} +else +{ +mode = lowmode + 1; +if (highcost acost) +acost = highcost; +
Re: [x265] [PATCH] Added fast intra search option
On 08/12, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd on my newer CPUs, this version was unambiguously faster; so I've pushed this version, thanks. diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cppTue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cppTue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +highcost = m_me.COST_MAX; +if (lowmode 34) +{ +mode = lowmode + 2; +if (mode 18) +highcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +highcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +} +if (lowcost = highcost) +{ +mode = lowmode - 1;