Re: [x265] [PATCH] Added fast intra search option
There are a couple of warnings our regression tests caught with this. Can you take a look? source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' : forcing value to bool 'true' or 'false' (performance warning) (IntraFilterType can be bool, I think?). C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning C4701: potentially uninitialized local variable 'lowmode' used Thanks, Deepthi On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +highcost = m_me.COST_MAX; +if (lowmode 34) +{ +mode = lowmode + 2; +if (mode 18)
Re: [x265] [PATCH] Added fast intra search option
In building with gcc debian 4.7.2-5 I get no warnings. On 08/13/2014 05:46 AM, Deepthi Nandakumar wrote: There are a couple of warnings our regression tests caught with this. Can you take a look? source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' : forcing value to bool 'true' or 'false' (performance warning) (IntraFilterType can be bool, I think?). Initially I used bool for the table but unsigned char performed better on my old system. C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning C4701: potentially uninitialized local variable 'lowmode' used I'll submit a patch to set lowmode to a default. Thanks, Deepthi On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com mailto:dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com mailto:dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; - primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); + primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost)
Re: [x265] [PATCH] Added fast intra search option
On 08/12/2014 10:22 PM, Steve Borho wrote: On 08/12, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd on my newer CPUs, this version was unambiguously faster; so I've pushed this version, thanks. How were you testing it? I was encoding a 2 minute video with -I 1. I also have a patch that changes m_predictions from EstimateRow member pointer to enough dynamically allocated memory to hold all 35 predictions to a local array of EstimateRow::estimateCUCost big enough to hold one prediction which can't be used with allangs and so would only be useful with the other fast-intra version. Again, I didn't seem to help much on my system but if you would like to try it I'll submit a patch. diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +
Re: [x265] [PATCH] Added fast intra search option
On 08/12, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd on my newer CPUs, this version was unambiguously faster; so I've pushed this version, thanks. diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cppTue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cppTue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +highcost = m_me.COST_MAX; +if (lowmode 34) +{ +mode = lowmode + 2; +if (mode 18) +highcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +highcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +} +if (lowcost = highcost) +{ +mode = lowmode - 1;