Re: [x265] [PATCH] Added fast intra search option

2014-08-13 Thread Deepthi Nandakumar
There are a couple of warnings our regression tests caught with this. Can
you take a look?

source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' :
forcing value to bool 'true' or 'false' (performance warning)
(IntraFilterType can be bool, I think?).


C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning
C4701: potentially uninitialized local variable 'lowmode' used

Thanks,
Deepthi



On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com wrote:

 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1407882999 25200
 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5
 # Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
 Added fast intra search option

 This version calls intra_pred_allangs  to create the predictions then the
 faster search with satd

 diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp
 --- a/source/common/param.cpp   Tue Aug 12 01:11:39 2014 -0500
 +++ b/source/common/param.cpp   Tue Aug 12 15:36:39 2014 -0700
 @@ -132,6 +132,7 @@
  /* Intra Coding Tools */
  param-bEnableConstrainedIntra = 0;
  param-bEnableStrongIntraSmoothing = 1;
 +param-bEnableFastIntra = 0;

  /* Inter Coding tools */
  param-searchMethod = X265_HEX_SEARCH;
 @@ -560,6 +561,7 @@
  OPT(lossless) p-bLossless = atobool(value);
  OPT(cu-lossless) p-bCULossless = atobool(value);
  OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value);
 +OPT(fast-intra) p-bEnableFastIntra = atobool(value);
  OPT(open-gop) p-bOpenGOP = atobool(value);
  OPT(scenecut)
  {
 @@ -1211,6 +1213,7 @@
  BOOL(p-bLossless, lossless);
  BOOL(p-bCULossless, cu-lossless);
  BOOL(p-bEnableConstrainedIntra, constrained-intra);
 +BOOL(p-bEnableFastIntra, fast-intra);
  BOOL(p-bOpenGOP, open-gop);
  s += sprintf(s,  interlace=%d, p-interlaceMode);
  s += sprintf(s,  keyint=%d, p-keyframeMax);
 diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cpp  Tue Aug 12 01:11:39 2014 -0500
 +++ b/source/encoder/slicetype.cpp  Tue Aug 12 15:36:39 2014 -0700
 @@ -1242,6 +1242,7 @@
  {
  m_rows[i].m_widthInCU = m_widthInCU;
  m_rows[i].m_heightInCU = m_heightInCU;
 +m_rows[i].m_param = m_param;
  }

  if (!WaveFront::init(m_heightInCU))
 @@ -1676,26 +1677,86 @@

  int predsize = cuSize * cuSize;

 -// generate 35 intra predictions into tmp
 +// generate 35 intra predictions into m_predictions
 +pixelcmp_t satd =
 primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
 +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost =
 m_me.COST_MAX;
 +uint32_t  lowmode, mode;
  primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize,
 left0, above0, 0, (cuSize = 16));
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
 +if (cost  icost)
 +icost = cost;
  pixel *above = (cuSize = 8) ? above1 : above0;
  pixel *left  = (cuSize = 8) ? left1 : left0;
 -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions +
 predsize, cuSize, left, above, 0, 0);
 +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize,
 left, above, 0, 0);
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
 +if (cost  icost)
 +icost = cost;
  primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 *
 predsize, above0, left0, above1, left1, (cuSize = 16));

 -// calculate 35 satd costs, keep least cost
 +// calculate satd costs, keep least cost
  ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
  primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
 -pixelcmp_t satd =
 primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
 -int icost = m_me.COST_MAX, cost;
 -for (uint32_t mode = 0; mode  35; mode++)
 +// fast-intra angle search
 +if (m_param-bEnableFastIntra)
  {
 -if ((mode = 2)  (mode  18))
 +for (mode = 4;mode  35; mode += 5)
 +{
 +if (mode  18)
 +cost = satd(buf_trans, cuSize, m_predictions[mode *
 predsize], cuSize);
 +else
 +cost = satd(m_me.fenc, FENC_STRIDE,
 m_predictions[mode * predsize], cuSize);
 +if (cost  acost)
 +{
 +lowmode = mode;
 +acost = cost;
 +}
 +}
 +mode = lowmode - 2;
 +if (mode  18)
 +lowcost = satd(buf_trans, cuSize, m_predictions[mode *
 predsize], cuSize);
 +else
 +lowcost = satd(m_me.fenc, FENC_STRIDE,
 m_predictions[mode * predsize], cuSize);
 +highcost = m_me.COST_MAX;
 +if (lowmode  34)
 +{
 +mode = lowmode + 2;
 +if (mode  18)

Re: [x265] [PATCH] Added fast intra search option

2014-08-13 Thread dave

In building with gcc debian 4.7.2-5 I get no warnings.
On 08/13/2014 05:46 AM, Deepthi Nandakumar wrote:
There are a couple of warnings our regression tests caught with this. 
Can you take a look?


source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' : 
forcing value to bool 'true' or 'false' (performance warning)

(IntraFilterType can be bool, I think?).

Initially I used bool for the table but unsigned char performed better 
on my old system.


C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning 
C4701: potentially uninitialized local variable 'lowmode' used



I'll submit a patch to set lowmode to a default.

Thanks,
Deepthi



On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com 
mailto:dtyx...@gmail.com wrote:


# HG changeset patch
# User David T Yuen dtyx...@gmail.com mailto:dtyx...@gmail.com
# Date 1407882999 25200
# Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5
# Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
Added fast intra search option

This version calls intra_pred_allangs  to create the predictions
then the faster search with satd

diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp
--- a/source/common/param.cpp   Tue Aug 12 01:11:39 2014 -0500
+++ b/source/common/param.cpp   Tue Aug 12 15:36:39 2014 -0700
@@ -132,6 +132,7 @@
 /* Intra Coding Tools */
 param-bEnableConstrainedIntra = 0;
 param-bEnableStrongIntraSmoothing = 1;
+param-bEnableFastIntra = 0;

 /* Inter Coding tools */
 param-searchMethod = X265_HEX_SEARCH;
@@ -560,6 +561,7 @@
 OPT(lossless) p-bLossless = atobool(value);
 OPT(cu-lossless) p-bCULossless = atobool(value);
 OPT(constrained-intra) p-bEnableConstrainedIntra =
atobool(value);
+OPT(fast-intra) p-bEnableFastIntra = atobool(value);
 OPT(open-gop) p-bOpenGOP = atobool(value);
 OPT(scenecut)
 {
@@ -1211,6 +1213,7 @@
 BOOL(p-bLossless, lossless);
 BOOL(p-bCULossless, cu-lossless);
 BOOL(p-bEnableConstrainedIntra, constrained-intra);
+BOOL(p-bEnableFastIntra, fast-intra);
 BOOL(p-bOpenGOP, open-gop);
 s += sprintf(s,  interlace=%d, p-interlaceMode);
 s += sprintf(s,  keyint=%d, p-keyframeMax);
diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp  Tue Aug 12 01:11:39 2014 -0500
+++ b/source/encoder/slicetype.cpp  Tue Aug 12 15:36:39 2014 -0700
@@ -1242,6 +1242,7 @@
 {
 m_rows[i].m_widthInCU = m_widthInCU;
 m_rows[i].m_heightInCU = m_heightInCU;
+m_rows[i].m_param = m_param;
 }

 if (!WaveFront::init(m_heightInCU))
@@ -1676,26 +1677,86 @@

 int predsize = cuSize * cuSize;

-// generate 35 intra predictions into tmp
+// generate 35 intra predictions into m_predictions
+pixelcmp_t satd =
primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
+int icost = m_me.COST_MAX, cost, highcost, lowcost, acost
= m_me.COST_MAX;
+uint32_t  lowmode, mode;
 primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize,
left0, above0, 0, (cuSize = 16));
+cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
+if (cost  icost)
+icost = cost;
 pixel *above = (cuSize = 8) ? above1 : above0;
 pixel *left  = (cuSize = 8) ? left1 : left0;
-  primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions +
predsize, cuSize, left, above, 0, 0);
+  primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions,
cuSize, left, above, 0, 0);
+cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
+if (cost  icost)
+icost = cost;
 primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 *
predsize, above0, left0, above1, left1, (cuSize = 16));

-// calculate 35 satd costs, keep least cost
+// calculate satd costs, keep least cost
 ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
 primitives.transpose[sizeIdx](buf_trans, m_me.fenc,
FENC_STRIDE);
-pixelcmp_t satd =
primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
-int icost = m_me.COST_MAX, cost;
-for (uint32_t mode = 0; mode  35; mode++)
+// fast-intra angle search
+if (m_param-bEnableFastIntra)
 {
-if ((mode = 2)  (mode  18))
+for (mode = 4;mode  35; mode += 5)
+{
+if (mode  18)
+cost = satd(buf_trans, cuSize,
m_predictions[mode * predsize], cuSize);
+else
+cost = satd(m_me.fenc, FENC_STRIDE,
m_predictions[mode * predsize], cuSize);
+if (cost  acost)

Re: [x265] [PATCH] Added fast intra search option

2014-08-13 Thread dave

On 08/12/2014 10:22 PM, Steve Borho wrote:

On 08/12, dtyx...@gmail.com wrote:

# HG changeset patch
# User David T Yuen dtyx...@gmail.com
# Date 1407882999 25200
# Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5
# Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
Added fast intra search option

This version calls intra_pred_allangs  to create the predictions then the 
faster search with satd

on my newer CPUs, this version was unambiguously faster; so I've pushed
this version, thanks.

How were you testing it?  I was encoding a 2 minute video with -I 1.

I also have a patch that changes m_predictions from EstimateRow member 
pointer to enough dynamically allocated memory to hold all 35 
predictions to a local array of EstimateRow::estimateCUCost big enough 
to hold one prediction which can't be used with allangs and so would 
only be useful with the other fast-intra version.  Again, I didn't seem 
to help much on my system but if you would like to try it I'll submit a 
patch.



diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp
--- a/source/common/param.cpp   Tue Aug 12 01:11:39 2014 -0500
+++ b/source/common/param.cpp   Tue Aug 12 15:36:39 2014 -0700
@@ -132,6 +132,7 @@
  /* Intra Coding Tools */
  param-bEnableConstrainedIntra = 0;
  param-bEnableStrongIntraSmoothing = 1;
+param-bEnableFastIntra = 0;
  
  /* Inter Coding tools */

  param-searchMethod = X265_HEX_SEARCH;
@@ -560,6 +561,7 @@
  OPT(lossless) p-bLossless = atobool(value);
  OPT(cu-lossless) p-bCULossless = atobool(value);
  OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value);
+OPT(fast-intra) p-bEnableFastIntra = atobool(value);
  OPT(open-gop) p-bOpenGOP = atobool(value);
  OPT(scenecut)
  {
@@ -1211,6 +1213,7 @@
  BOOL(p-bLossless, lossless);
  BOOL(p-bCULossless, cu-lossless);
  BOOL(p-bEnableConstrainedIntra, constrained-intra);
+BOOL(p-bEnableFastIntra, fast-intra);
  BOOL(p-bOpenGOP, open-gop);
  s += sprintf(s,  interlace=%d, p-interlaceMode);
  s += sprintf(s,  keyint=%d, p-keyframeMax);
diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp  Tue Aug 12 01:11:39 2014 -0500
+++ b/source/encoder/slicetype.cpp  Tue Aug 12 15:36:39 2014 -0700
@@ -1242,6 +1242,7 @@
  {
  m_rows[i].m_widthInCU = m_widthInCU;
  m_rows[i].m_heightInCU = m_heightInCU;
+m_rows[i].m_param = m_param;
  }
  
  if (!WaveFront::init(m_heightInCU))

@@ -1676,26 +1677,86 @@
  
  int predsize = cuSize * cuSize;
  
-// generate 35 intra predictions into tmp

+// generate 35 intra predictions into m_predictions
+pixelcmp_t satd = 
primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
+int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = 
m_me.COST_MAX;
+uint32_t  lowmode, mode;
  primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, 
above0, 0, (cuSize = 16));
+cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
+if (cost  icost)
+icost = cost;
  pixel *above = (cuSize = 8) ? above1 : above0;
  pixel *left  = (cuSize = 8) ? left1 : left0;
-primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, 
cuSize, left, above, 0, 0);
+primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, 
left, above, 0, 0);
+cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
+if (cost  icost)
+icost = cost;
  primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, 
above0, left0, above1, left1, (cuSize = 16));
  
-// calculate 35 satd costs, keep least cost

+// calculate satd costs, keep least cost
  ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
  primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
-pixelcmp_t satd = 
primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
-int icost = m_me.COST_MAX, cost;
-for (uint32_t mode = 0; mode  35; mode++)
+// fast-intra angle search
+if (m_param-bEnableFastIntra)
  {
-if ((mode = 2)  (mode  18))
+for (mode = 4;mode  35; mode += 5)
+{
+if (mode  18)
+cost = satd(buf_trans, cuSize, m_predictions[mode * 
predsize], cuSize);
+else
+cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * 
predsize], cuSize);
+if (cost  acost)
+{
+lowmode = mode;
+acost = cost;
+}
+}
+mode = lowmode - 2;
+if (mode  18)
+lowcost = satd(buf_trans, cuSize, m_predictions[mode * 
predsize], cuSize);
+else
+lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * 
predsize], cuSize);
+

Re: [x265] [PATCH] Added fast intra search option

2014-08-12 Thread Steve Borho
On 08/12, dtyx...@gmail.com wrote:
 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1407882999 25200
 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5
 # Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
 Added fast intra search option
 
 This version calls intra_pred_allangs  to create the predictions then the 
 faster search with satd

on my newer CPUs, this version was unambiguously faster; so I've pushed
this version, thanks.

 diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp
 --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500
 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700
 @@ -132,6 +132,7 @@
  /* Intra Coding Tools */
  param-bEnableConstrainedIntra = 0;
  param-bEnableStrongIntraSmoothing = 1;
 +param-bEnableFastIntra = 0;
  
  /* Inter Coding tools */
  param-searchMethod = X265_HEX_SEARCH;
 @@ -560,6 +561,7 @@
  OPT(lossless) p-bLossless = atobool(value);
  OPT(cu-lossless) p-bCULossless = atobool(value);
  OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value);
 +OPT(fast-intra) p-bEnableFastIntra = atobool(value);
  OPT(open-gop) p-bOpenGOP = atobool(value);
  OPT(scenecut)
  {
 @@ -1211,6 +1213,7 @@
  BOOL(p-bLossless, lossless);
  BOOL(p-bCULossless, cu-lossless);
  BOOL(p-bEnableConstrainedIntra, constrained-intra);
 +BOOL(p-bEnableFastIntra, fast-intra);
  BOOL(p-bOpenGOP, open-gop);
  s += sprintf(s,  interlace=%d, p-interlaceMode);
  s += sprintf(s,  keyint=%d, p-keyframeMax);
 diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cppTue Aug 12 01:11:39 2014 -0500
 +++ b/source/encoder/slicetype.cppTue Aug 12 15:36:39 2014 -0700
 @@ -1242,6 +1242,7 @@
  {
  m_rows[i].m_widthInCU = m_widthInCU;
  m_rows[i].m_heightInCU = m_heightInCU;
 +m_rows[i].m_param = m_param;
  }
  
  if (!WaveFront::init(m_heightInCU))
 @@ -1676,26 +1677,86 @@
  
  int predsize = cuSize * cuSize;
  
 -// generate 35 intra predictions into tmp
 +// generate 35 intra predictions into m_predictions
 +pixelcmp_t satd = 
 primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
 +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = 
 m_me.COST_MAX;
 +uint32_t  lowmode, mode;
  primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, 
 above0, 0, (cuSize = 16));
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
 +if (cost  icost)
 +icost = cost;
  pixel *above = (cuSize = 8) ? above1 : above0;
  pixel *left  = (cuSize = 8) ? left1 : left0;
 -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, 
 cuSize, left, above, 0, 0);
 +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, 
 left, above, 0, 0);
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
 +if (cost  icost)
 +icost = cost;
  primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, 
 above0, left0, above1, left1, (cuSize = 16));
  
 -// calculate 35 satd costs, keep least cost
 +// calculate satd costs, keep least cost
  ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
  primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
 -pixelcmp_t satd = 
 primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
 -int icost = m_me.COST_MAX, cost;
 -for (uint32_t mode = 0; mode  35; mode++)
 +// fast-intra angle search
 +if (m_param-bEnableFastIntra)
  {
 -if ((mode = 2)  (mode  18))
 +for (mode = 4;mode  35; mode += 5)
 +{
 +if (mode  18)
 +cost = satd(buf_trans, cuSize, m_predictions[mode * 
 predsize], cuSize);
 +else
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode 
 * predsize], cuSize);
 +if (cost  acost)
 +{
 +lowmode = mode;
 +acost = cost;
 +}
 +}
 +mode = lowmode - 2;
 +if (mode  18)
 +lowcost = satd(buf_trans, cuSize, m_predictions[mode * 
 predsize], cuSize);
 +else
 +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * 
 predsize], cuSize);
 +highcost = m_me.COST_MAX;
 +if (lowmode  34)
 +{
 +mode = lowmode + 2;
 +if (mode  18)
 +highcost = satd(buf_trans, cuSize, m_predictions[mode * 
 predsize], cuSize);
 +else
 +highcost = satd(m_me.fenc, FENC_STRIDE, 
 m_predictions[mode * predsize], cuSize);
 +}
 +if (lowcost = highcost)
 +{
 +mode = lowmode - 1;