Re: [x265] [PATCH 2 of 2 V2] framefilter: comment cleanups, use pixel data type

2013-09-15 Thread Deepthi Nandakumar
On Fri, Sep 13, 2013 at 11:11 PM, Steve Borho st...@borho.org wrote:

 # HG changeset patch
 # User Steve Borho st...@borho.org
 # Date 1379053732 18000
 #  Fri Sep 13 01:28:52 2013 -0500
 # Node ID b8bb66cd21bcab6505b7fe321e95875861c84bda
 # Parent  2614338b90d3533c2760a94fa10ffb5dee57910c
 framefilter: comment cleanups, use pixel data type

 diff -r 2614338b90d3 -r b8bb66cd21bc source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp   Fri Sep 13 10:55:03 2013 -0500
 +++ b/source/encoder/frameencoder.cpp   Fri Sep 13 01:28:52 2013 -0500
 @@ -937,14 +937,13 @@
  return;
  }
  }
 +// this row of CTUs has been encoded

 -// Active Loopfilter
 +// Run row-wise loop filters
  if (row = m_filterRowDelay)
  {
  m_frameFilter.processRow(row - m_filterRowDelay);
  }
 -
 -// this row of CTUs has been encoded
  if (row == m_numRows - 1)
  {
  for(int i = m_numRows - m_filterRowDelay; i  m_numRows; i++)
 diff -r 2614338b90d3 -r b8bb66cd21bc source/encoder/framefilter.cpp
 --- a/source/encoder/framefilter.cppFri Sep 13 10:55:03 2013 -0500
 +++ b/source/encoder/framefilter.cppFri Sep 13 01:28:52 2013 -0500
 @@ -170,7 +170,7 @@
  m_sao.processSaoUnitRow(saoParam-saoLcuParam[2], row -
 1, 2);
  }

 -// TODO: this code is NOT VERIFY because TransformSkip and
 PCM mode have some bugs, they always not active!
 +// TODO: this code is NOT VERIFIED because TransformSkip and
 PCM modes have some bugs, they are never enabled
  bool  bPCMFilter = (m_pic-getSlice()-getSPS()-getUsePCM()
  m_pic-getSlice()-getSPS()-getPCMFilterDisableFlag()) ? true : false;
  if (bPCMFilter ||
 m_pic-getSlice()-getPPS()-getTransquantBypassEnableFlag())
  {
 @@ -187,8 +187,6 @@

  // this row of CTUs has been encoded

 -// TODO: extend margins for motion reference
 -
  if (row  0)
  {
  processRowPost(row - 1);
 @@ -209,7 +207,7 @@
  m_sao.processSaoUnitRow(saoParam-saoLcuParam[2], row, 2);
  }

 -// TODO: this code is NOT VERIFY because TransformSkip and PCM
 mode have some bugs, they always not active!
 +// TODO: this code is NOT VERIFIED because TransformSkip and PCM
 modes have some bugs, they are never enabled
  bool  bPCMFilter = (m_pic-getSlice()-getSPS()-getUsePCM() 
 m_pic-getSlice()-getSPS()-getPCMFilterDisableFlag()) ? true : false;
  if (bPCMFilter ||
 m_pic-getSlice()-getPPS()-getTransquantBypassEnableFlag())
  {
 @@ -234,10 +232,6 @@
  const int lastH = ((recon-getHeight() % g_maxCUHeight) ?
 (recon-getHeight() % g_maxCUHeight) : g_maxCUHeight);
  const int realH = (row != m_numRows - 1) ? g_maxCUHeight : lastH;

 -// TODO: Remove when we confirm below code is right
 -//recon-xExtendPicCompBorder(recon-getLumaAddr(),
 recon-getStride(), recon-getWidth(), recon-getHeight(),
 recon-m_lumaMarginX, recon-m_lumaMarginY);
 -//recon-xExtendPicCompBorder(recon-getCbAddr(),
 recon-getCStride(), recon-getWidth()  1, recon-getHeight()  1,
 recon-m_chromaMarginX, recon-m_chromaMarginY);
 -//recon-xExtendPicCompBorder(recon-getCrAddr(),
 recon-getCStride(), recon-getWidth()  1, recon-getHeight()  1,
 recon-m_chromaMarginX, recon-m_chromaMarginY);
  // Border extend Left and Right
  primitives.extendRowBorder(recon-getLumaAddr(lineStartCUAddr),
 recon-getStride(), recon-getWidth(), realH, recon-getLumaMarginX());
  primitives.extendRowBorder(recon-getCbAddr(lineStartCUAddr),
 recon-getCStride(), recon-getWidth()  1, realH  1,
 recon-getChromaMarginX());
 @@ -248,9 +242,9 @@
  {
  const intptr_t stride = recon-getStride();
  const intptr_t strideC = recon-getCStride();
 -Pel *pixY = recon-getLumaAddr(lineStartCUAddr) -
 recon-getLumaMarginX();
 -Pel *pixU = recon-getCbAddr(lineStartCUAddr) -
 recon-getChromaMarginX();
 -Pel *pixV = recon-getCrAddr(lineStartCUAddr) -
 recon-getChromaMarginX();
 +pixel *pixY = recon-getLumaAddr(lineStartCUAddr) -
 recon-getLumaMarginX();
 +pixel *pixU = recon-getCbAddr(lineStartCUAddr) -
 recon-getChromaMarginX();
 +pixel *pixV = recon-getCrAddr(lineStartCUAddr) -
 recon-getChromaMarginX();


Not sure why Pel has been changed to pixel (dropping 16-bit support
altogether ?) since getLuma/Cb/CrAddr still return Pel.
Anyways, the sizeof operator in the following memcpy's should also be
changed to pixel.



  for (int y = 0; y  recon-getLumaMarginY(); y++)
  {
 @@ -269,9 +263,9 @@
  {
  const intptr_t stride = recon-getStride();
  const intptr_t strideC = recon-getCStride();
 -Pel *pixY = recon-getLumaAddr(lineStartCUAddr) -
 recon-getLumaMarginX() + (realH - 1) * stride;
 -Pel *pixU = recon-getCbAddr(lineStartCUAddr) -
 recon-getChromaMarginX() + ((realH  1) - 1) * strideC;
 -Pel *pixV = 

Re: [x265] [PATCH]RDLevel: Disable RDOQTS when RDO and/or TS are disabled.

2013-09-16 Thread Deepthi Nandakumar
Agreed. Resending the patch - alongwith a few cleanups to make it easier to
track all elements in the param struct.


On Mon, Sep 16, 2013 at 3:27 PM, Derek Buitenhuis 
derek.buitenh...@gmail.com wrote:

 On Mon, Sep 16, 2013 at 10:47 AM, Deepthi Nandakumar
 deep...@multicorewareinc.com wrote:

  Yes, this particular param flag is initialised to 1 (highest quality
  setting) in x265_param_default. I'm setting it to zero for a certain set
 of
  user defined parameters.

 What's the point of the first check
 (
 http://mailman.videolan.org/pipermail/x265-devel/2013-September/000783.html
 )
 which sets it to 1 then?

 - Derek
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH v2]: CLI: remove CLI option rdoqts; cleanup

2013-09-16 Thread Deepthi Nandakumar
Thanks for pointing that out, that was indeed unintentional. Pushed the
fix.


On Mon, Sep 16, 2013 at 9:11 PM, Derek Buitenhuis 
derek.buitenh...@gmail.com wrote:

 On Mon, Sep 16, 2013 at 1:30 PM, Deepthi Nandakumar
 deep...@multicorewareinc.com wrote:
  # HG changeset patch
  # User Deepthi Nandakumar deep...@multicorewareinc.com
  # Date 1379334518 -19800
  # Node ID 46b065f7d676e7ff26c46a40f1790bdae290d7fa
  # Parent  881444f5910b2b0e0f286a6ca47fcc743515cbb2
  CLI options: Eliminate rdoqts option; cleanup
 
  1. Eliminate rdoqts CLI option: enabled when rdoq and ts are both
 enabled.
  2. Rearrange default initialisations in x265_param_ t structure
 
  diff -r 881444f5910b -r 46b065f7d676 source/common/common.cpp
  --- a/source/common/common.cppMon Sep 16 09:41:34 2013 +0530
  +++ b/source/common/common.cppMon Sep 16 17:58:38 2013 +0530
  @@ -115,45 +115,58 @@
   va_end(arg);
   }
 
  -extern C
   void x265_param_default(x265_param_t *param)

 This looks incorrect. The function needs to be exported and to be able
 to be called
 from C.


  diff -r 881444f5910b -r 46b065f7d676 source/encoder/encoder.cpp
  --- a/source/encoder/encoder.cppMon Sep 16 09:41:34 2013 +0530
  +++ b/source/encoder/encoder.cppMon Sep 16 17:58:38 2013 +0530
  @@ -219,6 +219,11 @@
   _param-rc.rateControlMode = X265_RC_ABR;
   }
 
  +if(!(_param-bEnableRDOQ  _param-bEnableTransformSkip))
  +{
  +_param-bEnableRDOQTS = 0;
  +}

 Please add a note in the commit message about this.

 Rest is OK.

 - Derek
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] do not use std;:list for the class AccessUnit

2013-09-19 Thread Deepthi Nandakumar
FrameEncoder and compressFrame needs to be refactored for mallocs and
failures to be handled correctly.


On Thu, Sep 19, 2013 at 1:31 PM, Gopu Govindaswamy 
g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1379577682 -19800
 # Node ID 92e1ac03b081eccd8bc797142cc22033c87d475d
 # Parent  26d6f155f8df69147f40f4945d99c29a52988c56
 do not use std;:list for the class AccessUnit

 Removed std::list from encoder and nalunits

 diff -r 26d6f155f8df -r 92e1ac03b081 source/Lib/TLibCommon/NAL.h
 --- a/source/Lib/TLibCommon/NAL.h   Wed Sep 18 16:13:33 2013 -0500
 +++ b/source/Lib/TLibCommon/NAL.h   Thu Sep 19 13:31:22 2013 +0530
 @@ -114,6 +114,7 @@
   * emulation_prevention_three_byte symbols.
   */
  NALUnitEBSP(OutputNALUnit nalu);
 +void init(OutputNALUnit nalu);
  };
  }
  //! \}
 diff -r 26d6f155f8df -r 92e1ac03b081 source/Lib/TLibEncoder/NALwrite.h
 --- a/source/Lib/TLibEncoder/NALwrite.h Wed Sep 18 16:13:33 2013 -0500
 +++ b/source/Lib/TLibEncoder/NALwrite.h Thu Sep 19 13:31:22 2013 +0530
 @@ -83,6 +83,14 @@
  write(m_nalUnitData, nalu, m_packetSize);
  }

 +inline void NALUnitEBSP::init(OutputNALUnit nalu)
 +{
 +m_nalUnitType = nalu.m_nalUnitType;
 +m_temporalId = nalu.m_temporalId;
 +m_reservedZero6Bits = nalu.m_reservedZero6Bits;
 +write(m_nalUnitData, nalu, m_packetSize);
 +}
 +
  void copyNaluData(OutputNALUnit naluDest, const OutputNALUnit naluSrc);
  }

 diff -r 26d6f155f8df -r 92e1ac03b081 source/Lib/TLibEncoder/TEncTop.cpp
 --- a/source/Lib/TLibEncoder/TEncTop.cppWed Sep 18 16:13:33 2013
 -0500
 +++ b/source/Lib/TLibEncoder/TEncTop.cppThu Sep 19 13:31:22 2013
 +0530
 @@ -114,7 +114,7 @@
  for (int i = 0; i  param.frameNumThreads; i++)
  {
  // Ensure frame encoder is idle before destroying it
 -AccessUnit tmp;
 +NALUnitEBSP **tmp = NULL;
  m_frameEncoder[i].getEncodedPicture(tmp);
  m_frameEncoder[i].destroy();
  }
 @@ -155,19 +155,19 @@
  }
  }

 -int TEncTop::getStreamHeaders(AccessUnit accessUnit)
 +int TEncTop::getStreamHeaders(NALUnitEBSP **nalunits)
  {
 -return m_frameEncoder-getStreamHeaders(accessUnit);
 +return m_frameEncoder-getStreamHeaders(nalunits);
  }

  /**
   \param   flush   force encoder to encode a frame
   \param   pic_in  input original YUV picture or NULL
   \param   pic_out pointer to reconstructed picture struct
 - \param   accessUnitsOut  output bitstream
 + \param   nalunitsoutput bitstream
   \retval  number of encoded pictures
   */
 -int TEncTop::encode(bool flush, const x265_picture_t* pic_in,
 x265_picture_t *pic_out, AccessUnit accessUnitOut)
 +int TEncTop::encode(bool flush, const x265_picture_t* pic_in,
 x265_picture_t *pic_out, NALUnitEBSP **nalunits)
  {
  if (pic_in)
  {
 @@ -207,7 +207,7 @@
  // getEncodedPicture() should block until the FrameEncoder has
 completed
  // encoding the frame.  This is how back-pressure through the API is
  // accomplished when the encoder is full.
 -TComPic *out = curEncoder-getEncodedPicture(accessUnitOut);
 +TComPic *out = curEncoder-getEncodedPicture(nalunits);

  if (!out  flush)
  {
 @@ -221,7 +221,7 @@
  {
  curEncoder = m_frameEncoder[m_curEncoder];
  m_curEncoder = (m_curEncoder + 1) % param.frameNumThreads;
 -out = curEncoder-getEncodedPicture(accessUnitOut);
 +out = curEncoder-getEncodedPicture(nalunits);
  }
  while (!out  flushed != m_curEncoder);
  }
 @@ -253,7 +253,7 @@
  pic_out-stride[2] = recpic-getCStride();
  }

 -double bits = calculateHashAndPSNR(out, accessUnitOut);
 +double bits = calculateHashAndPSNR(out, nalunits);
  // Allow this frame to be recycled if no frame encoders are using
 it for reference
  ATOMIC_DEC(out-m_countRefEncoders);

 @@ -481,7 +481,7 @@

  /* Returns Number of bits in current encoded pic */

 -double TEncTop::calculateHashAndPSNR(TComPic* pic, AccessUnit accessUnit)
 +double TEncTop::calculateHashAndPSNR(TComPic* pic, NALUnitEBSP **nalunits)
  {
  TComPicYuv* recon = pic-getPicYuvRec();
  TComPicYuv* orig  = pic-getPicYuvOrg();
 @@ -537,8 +537,12 @@
  OutputNALUnit onalu(NAL_UNIT_SUFFIX_SEI, 0);
  m_frameEncoder-m_seiWriter.writeSEImessage(onalu.m_Bitstream,
 sei_recon_picture_digest, pic-getSlice()-getSPS());
  writeRBSPTrailingBits(onalu.m_Bitstream);
 -
 -accessUnit.insert(accessUnit.end(), new NALUnitEBSP(onalu));
 +
 +int count = 0;
 +while(nalunits[count] != NULL)
 +count++;
 +nalunits[count] = (NALUnitEBSP *)X265_MALLOC(NALUnitEBSP, 1);
 +nalunits[count]-init(onalu);
  }

  /* calculate the size of the access unit, excluding:
 @@ -546,13 

Re: [x265] [PATCH] lookahead: fix crash for I frame cost estimation

2013-09-19 Thread Deepthi Nandakumar
Can you try sending this as a fresh patch to the mailing list? Does not
apply cleanly to the parent node.


On Thu, Sep 19, 2013 at 2:46 PM, Deepthi Devaki Akkoorath 
deepthidev...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Deepthi Devaki deepthidev...@multicorewareinc.com
 # Date 1379582068 -19800
 # Node ID d52de033d7dde00255e9d55ece138c33fd61
 # Parent  26d6f155f8df69147f40f4945d99c29a52988c56
 lookahead: fix crash for I frame cost estimation

 diff -r 26d6f155f8df -r d52de033d7dd source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cpp Wed Sep 18 16:13:33 2013 -0500
 +++ b/source/encoder/slicetype.cpp Thu Sep 19 14:44:28 2013 +0530
 @@ -360,44 +360,46 @@
  mvmax.x = (uint16_t)((widthInCU - cux - 1) * cuSize + 8);
  mvmax.y = (uint16_t)((heightInCU - cuy - 1) * cuSize + 8);

 -for (int i = 0; i  1 + bBidir; i++)
 +if (p0 != p1)
  {
 -if (!bDoSearch[i])
 +for (int i = 0; i  1 + bBidir; i++)
  {
 -/* Use previously calculated cost */
 +if (!bDoSearch[i])
 +{
 +/* Use previously calculated cost */
 +COPY2_IF_LT(bcost, *fenc_costs[i], listused, i + 1);
 +continue;
 +}
 +int numc = 0;
 +MV mvc[4], mvp;
 +MV *fenc_mv = fenc_mvs[i];
 +
 +/* Reverse-order MV prediction. */
 +mvc[0] = 0;
 +mvc[2] = 0;
 +#define MVC(mv) mvc[numc++] = mv;
 +if (cux  widthInCU - 1)
 +MVC(fenc_mv[1]);
 +if (cuy  heightInCU - 1)
 +{
 +MVC(fenc_mv[widthInCU]);
 +if (cux  0)
 +MVC(fenc_mv[widthInCU - 1]);
 +if (cux  widthInCU - 1)
 +MVC(fenc_mv[widthInCU + 1]);
 +}
 +#undef MVC
 +if (numc = 1)
 +mvp = mvc[0];
 +else
 +{
 +x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
 +}
 +
 +*fenc_costs[i] = me.motionEstimate(i ? fref1 : fref0, mvmin,
 mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]);
  COPY2_IF_LT(bcost, *fenc_costs[i], listused, i + 1);
 -continue;
  }
 -int numc = 0;
 -MV mvc[4], mvp;
 -MV *fenc_mv = fenc_mvs[i];
 -
 -/* Reverse-order MV prediction. */
 -mvc[0] = 0;
 -mvc[2] = 0;
 -#define MVC(mv) mvc[numc++] = mv;
 -if (cux  widthInCU - 1)
 -MVC(fenc_mv[1]);
 -if (cuy  heightInCU - 1)
 -{
 -MVC(fenc_mv[widthInCU]);
 -if (cux  0)
 -MVC(fenc_mv[widthInCU - 1]);
 -if (cux  widthInCU - 1)
 -MVC(fenc_mv[widthInCU + 1]);
 -}
 -#undef MVC
 -if (numc = 1)
 -mvp = mvc[0];
 -else
 -{
 -x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
 -}
 -
 -*fenc_costs[i] = me.motionEstimate(i ? fref1 : fref0, mvmin,
 mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]);
 -COPY2_IF_LT(bcost, *fenc_costs[i], listused, i + 1);
  }
 -
  if (!fenc-bIntraCalculated)
  {
  int nLog2SizeMinus2 = g_convertToBit[cuSize]; // partition size


 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] Commit c56e392b2c68 fails to link with MinGW

2013-09-24 Thread Deepthi Nandakumar
Thanks, pushed fix.


On Tue, Sep 24, 2013 at 2:14 PM, Nikos Barkas nikbar2...@yahoo.com wrote:

 There is a linking problem with revision c56e392b2c68 on MinGW. The error
 comes from common.cpp and is our old friend:

 undefined reference to `__sync_val_compare_and_swap_4'

 The file common.cpp must be added to the list of files for which
 -march=i686 is applied in source\common\CMakeLists.txt.

 Best regards
 Nikos

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] Fixed the --me 4 option error for Full search

2013-10-15 Thread Deepthi Nandakumar
# HG changeset patch
# User sai...@multicorewareinc.com
# Date 1381818060 -19800
#  Tue Oct 15 11:51:00 2013 +0530
# Branch hsa
# Node ID 5725e4986fa142c38ed7416f303f93604135c603
# Parent  0004c182f326197019a7426f080dd04ff99f39d3
Fixed the --me 4 cli option error

diff -r 0004c182f326 -r 5725e4986fa1 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Thu Oct 10 19:12:06 2013 -0500
+++ b/source/encoder/motion.cpp Tue Oct 15 11:51:00 2013 +0530
@@ -732,8 +732,9 @@
 break;
 }
 }
+break;
 }
-break;
+
 case X265_FULL_SEARCH:
 {
 // dead slow exhaustive search, but at least it uses sad_x4()
@@ -767,8 +768,9 @@
 COST_MV(tmv.x, tmv.y);
 }
 }
+break;
 }
-
+
 default:
 assert(0);
 break;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Fixed issue with chroma 2xN block asm code

2013-10-17 Thread Deepthi Nandakumar
Pushed.


On Thu, Oct 17, 2013 at 12:52 PM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1381994531 -19800
 # Node ID 68f97c7f08392d387046736ae3e86095c653fd05
 # Parent  5914800260d95a9bac7ce3eec2291e5c781e8422
 Fixed issue with chroma 2xN block asm code

 diff -r 5914800260d9 -r 68f97c7f0839 source/common/x86/ipfilter8.asm
 --- a/source/common/x86/ipfilter8.asm   Wed Oct 16 21:10:13 2013 +0530
 +++ b/source/common/x86/ipfilter8.asm   Thu Oct 17 12:52:11 2013 +0530
 @@ -54,7 +54,7 @@
  pmulhrsw%2, %3
  packuswb%2, %2
  pextrw  [dstq], %2, 0
 -pextrw  [dstq + dststrideq], %2, 1
 +pextrw  [dstq + dststrideq], %2, 2
  %endmacro


  
 ;-
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] lowres : removed duplicate code for extending right and bottom margin

2013-10-21 Thread Deepthi Nandakumar
Thanks - unintended duplicate copy. Pushed the same fix before I saw this.


On Mon, Oct 21, 2013 at 1:16 PM, Gopu Govindaswamy 
g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1382341604 -19800
 # Node ID ff8860e0b3082cbb1848e0f5c89e73dc1d7aa87e
 # Parent  fabb25ae4db4a06073decead3836521a14b5bde9
 lowres : removed duplicate code for extending right and bottom margin

 diff -r fabb25ae4db4 -r ff8860e0b308 source/common/lowres.cpp
 --- a/source/common/lowres.cpp  Sat Oct 19 18:08:07 2013 +0800
 +++ b/source/common/lowres.cpp  Mon Oct 21 13:16:44 2013 +0530
 @@ -159,24 +159,6 @@
  ::memcpy(src + y * srcStride, src, sizeof(Pel) * (extWidth));
  }

 -/* extending right margin*/
 -if (2 * width  orig-getWidth())
 -{
 -for (y = 0; y  srcHeight; y++)
 -{
 -::memset(src + srcWidth, src[srcWidth - 1], sizeof(Pel) *
 (X265_LOWRES_CU_SIZE - 1));
 -src += srcStride;
 -}
 -}
 -
 -/* extending bottom margin */
 -src = orig-getLumaAddr() + (srcHeight - 1) * srcStride;
 -
 -for (y = 1; y = 2 * lines - srcHeight; y++)
 -{
 -::memcpy(src + y * srcStride, src, sizeof(Pel) * (extWidth));
 -}
 -
  /* downscale and generate 4 HPEL planes for lookahead */
  primitives.frame_init_lowres_core(orig-getLumaAddr(),
lowresPlane[0], lowresPlane[1],
 lowresPlane[2], lowresPlane[3],
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] ratecontrol: initialize frameThreads

2013-10-21 Thread Deepthi Nandakumar
Yes, hg import worked fine. You probably need to configure your username in
git settings to firstname lastname. Does this help?
https://help.github.com/articles/setting-your-username-in-git


On Mon, Oct 21, 2013 at 10:52 PM, Rafaël Carré fun...@videolan.org wrote:


 https://bitbucket.org/multicoreware/x265/commits/3fe9a9d0a0b6279643c9272805f4d9b46c2cdcd9

 =?utf-8?b?UmFmYcOrbCBDYXJyw6kgPGZ1bm1hbkB2aWRlb2xhbi5vcmc+?= committed

 Indeed :)

 I am sending these from my git hg clone with git send-email.

 Any idea how to make these patches more smooth from hg point of view?


 http://stackoverflow.com/questions/2626898/mercurial-copy-patch-to-repository
 seems to say hg import should work.

 How did you apply this one?

 Thanks,

 Le 21/10/2013 18:47, Deepthi Nandakumar a écrit :
  Pushed this - but the user name has not been configured properly.
 
 
  On Mon, Oct 21, 2013 at 6:16 PM, Rafaël Carré fun...@videolan.org
 wrote:
 
  Fix a floating point exceptio
  ---
   source/encoder/ratecontrol.cpp | 1 +
   1 file changed, 1 insertion(+)
 
  diff --git a/source/encoder/ratecontrol.cpp
  b/source/encoder/ratecontrol.cpp
  index b588bde..9085ec3 100644
  --- a/source/encoder/ratecontrol.cpp
  +++ b/source/encoder/ratecontrol.cpp
  @@ -129,6 +129,7 @@ void RateControl::calcAdaptiveQuantFrame(TComPic
 *pic)
   RateControl::RateControl(TEncCfg * _cfg)
   {
   this-cfg = _cfg;
  +frameThreads = cfg-param.frameNumThreads;
   bitrate = cfg-param.rc.bitrate * 1000;
   frameDuration = 1.0 / cfg-param.frameRate;
   ncu = (int)((cfg-param.sourceHeight * cfg-param.sourceWidth) /
  pow((int)cfg-param.maxCUSize, 2.0));
  --
  1.8.3.2
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] no-rdo: use bit estimates from ME to calculate RDcost

2013-10-30 Thread Deepthi Nandakumar
Steve,

This is part of an ongoing change to rd 0/1 where we want to replace cost =
distortion + lambda*(coeff + mv bits), as opposed to that derived from the
RDO process. Here, the coeff bits have not been added, only me bits are
considered.

I believe we'll need an exhaustive set of tests for computer-generated
video like sintel/bigbuckbunny, and at a later stage, we could add in
presets for those (like x264 does).


On Thu, Oct 31, 2013 at 1:23 AM, Steve Borho st...@borho.org wrote:




 On Wed, Oct 30, 2013 at 4:47 AM, deepthidev...@multicorewareinc.comwrote:

 # HG changeset patch
 # User Deepthi Devaki deepthidev...@multicorewareinc.com
 # Date 1383126419 -19800
 # Node ID 77db80a67f4e55f22bc02ed02930a269bfac6b50
 # Parent  74bf8634037ce3e673b21738a5ffaf1c14381414
 no-rdo: use bit estimates from ME to calculate RDcost.

 bits estimated in ME stored in CU and used for calculating rdcost along
 with distortion. This results in better bitrate with no-rdo, with small
 drop in PSNR.


 I see this has been already pushed, but I'm not certain this is an
 unambiguously good trade-off:

 x265 sintel_trailer_2k_480p24.y4m out.hevc --rd 0  --b-adapt 2 -b3 --hash 1

 before:
 encoded 1253 frames in 262.32s (4.78 fps), 143.50 kb/s, Global PSNR: 48.745

 after:
 encoded 1253 frames in 259.50s (4.83 fps), 142.36 kb/s, Global PSNR: 48.655



 diff -r 74bf8634037c -r 77db80a67f4e source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Oct 30 13:44:16 2013
 +0530
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Wed Oct 30 15:16:59 2013
 +0530
 @@ -2115,7 +2115,7 @@
   * \param bValid
   * \returns void
   */
 -void TEncSearch::xMergeEstimation(TComDataCU* cu, int puIdx, uint32_t
 interDir, TComMvField* mvField, uint32_t mergeIndex, uint32_t outCost,
 TComMvField* mvFieldNeighbours, UChar* interDirNeighbours, int
 numValidMergeCand)
 +void TEncSearch::xMergeEstimation(TComDataCU* cu, int puIdx, uint32_t
 interDir, TComMvField* mvField, uint32_t mergeIndex, uint32_t outCost,
 uint32_t outbits, TComMvField* mvFieldNeighbours, UChar*
 interDirNeighbours, int numValidMergeCand)
  {
  uint32_t absPartIdx = 0;
  int width = 0;
 @@ -2144,7 +2144,7 @@
  {
  uint32_t costCand = MAX_UINT;
  uint32_t bitsCand = 0;
 -
 +
  cu-getCUMvField(REF_PIC_LIST_0)-m_mv[absPartIdx] =
 mvFieldNeighbours[0 + 2 * mergeCand].mv;
  cu-getCUMvField(REF_PIC_LIST_0)-m_refIdx[absPartIdx] =
 mvFieldNeighbours[0 + 2 * mergeCand].refIdx;
  cu-getCUMvField(REF_PIC_LIST_1)-m_mv[absPartIdx] =
 mvFieldNeighbours[1 + 2 * mergeCand].mv;
 @@ -2160,6 +2160,7 @@
  if (costCand  outCost)
  {
  outCost = costCand;
 +outbits = bitsCand;
  mvField[0] = mvFieldNeighbours[0 + 2 * mergeCand];
  mvField[1] = mvFieldNeighbours[1 + 2 * mergeCand];
  interDir = interDirNeighbours[mergeCand];
 @@ -2226,6 +2227,8 @@
  UChar interDirNeighbours[MRG_MAX_NUM_CANDS];
  int numValidMergeCand = 0;

 +int totalmebits = 0;
 +
  for (int partIdx = 0; partIdx  numPart; partIdx++)
  {
  uint32_t listCost[2] = { MAX_UINT, MAX_UINT };
 @@ -2495,7 +2498,8 @@

  // find Merge result
  uint32_t mrgCost = MAX_UINT;
 -xMergeEstimation(cu, partIdx, mrgInterDir, mrgMvField,
 mrgIndex, mrgCost, mvFieldNeighbours, interDirNeighbours,
 numValidMergeCand);
 +uint32_t mrgBits = 0;
 +xMergeEstimation(cu, partIdx, mrgInterDir, mrgMvField,
 mrgIndex, mrgCost, mrgBits, mvFieldNeighbours, interDirNeighbours,
 numValidMergeCand);
  if (mrgCost  meCost)
  {
  // set Merge result
 @@ -2517,6 +2521,7 @@
  #if CU_STAT_LOGFILE
  meCost += mrgCost;
  #endif
 +totalmebits += mrgBits;
  }
  else
  {
 @@ -2530,11 +2535,18 @@
  #if CU_STAT_LOGFILE
  meCost += meCost;
  #endif
 +totalmebits += mebits;
  }
  }
 +else
 +{
 +totalmebits += mebits;
 +}
  motionCompensation(cu, predYuv, REF_PIC_LIST_X, partIdx, bLuma,
 bChroma);
  }

 +cu-m_totalBits = totalmebits;
 +
  setWpScalingDistParam(cu, -1, REF_PIC_LIST_X);
  }

 diff -r 74bf8634037c -r 77db80a67f4e source/Lib/TLibEncoder/TEncSearch.h
 --- a/source/Lib/TLibEncoder/TEncSearch.h   Wed Oct 30 13:44:16 2013
 +0530
 +++ b/source/Lib/TLibEncoder/TEncSearch.h   Wed Oct 30 15:16:59 2013
 +0530
 @@ -211,7 +211,7 @@
  void xGetBlkBits(PartSize cuMode, bool bPSlice, int partIdx,
 uint32_t lastMode, uint32_t blockBit[3]);

  void xMergeEstimation(TComDataCU* cu, int partIdx, uint32_t
 uiInterDir,
 -  TComMvField* pacMvField, uint32_t mergeIndex,
 uint32_t outCost,
 +  TComMvField* pacMvField, uint32_t mergeIndex,
 uint32_t outCost, uint32_t outbits,

Re: [x265] [PATCH] no-rdo: Use entropy encoder for bit estimation

2013-10-31 Thread Deepthi Nandakumar
Posting the efficiency results FYI. The performance drop will be taken care
of once the rd 0/1 refactor is complete.


Before BasketballPass_416x240_50   (11.04 fps), 328.96 kb/s, Global
PSNR: 35.613
After BasketballPass_416x240_50(10.70 fps), 326.64 kb/s, Global
PSNR: 35.609

Before big_buck_bunny_360p24   (9.68 fps), 52.08 kb/s, Global PSNR:
43.549
After big_buck_bunny_360p24(9.12 fps), 51.80 kb/s, Global PSNR:
43.561

Before FourPeople_1280x720_60  (5.33 fps), 510.23 kb/s, Global
PSNR: 39.653
After FourPeople_1280x720_60   (5.11 fps), 505.69 kb/s, Global
PSNR: 39.654

Before sintel_trailer_2k_720p24(4.62 fps), 88.32 kb/s, Global PSNR:
54.869
After sintel_trailer_2k_720p24 (4.36 fps), 87.23 kb/s, Global PSNR:
54.802

Before Johnny_1280x720_60  (6.11 fps), 296.06 kb/s, Global
PSNR: 40.525
After Johnny_1280x720_60   (5.96 fps), 294.26 kb/s, Global
PSNR: 40.548

Before Kimono1_1920x1080_24(1.07 fps), 1811.05 kb/s, Global
PSNR: 38.624
After Kimono1_1920x1080_24 (1.02 fps), 1798.91 kb/s, Global
PSNR: 38.625

Before BasketballDrive_1920x1080   (1.16 fps), 3849.92 kb/s, Global
PSNR: 37.146
After BasketballDrive_1920x1080(1.06 fps), 3820.42 kb/s, Global
PSNR: 37.150



On Thu, Oct 31, 2013 at 12:44 PM, deepthidev...@multicorewareinc.comwrote:

 # HG changeset patch
 # User Deepthi Devaki deepthidev...@multicorewareinc.com
 # Date 1383203307 -19800
 # Node ID 4b4332d038832ab8812773d618b38329ec75ae4b
 # Parent  ec6b4d35f11053b06d0e1ea46df798ff89a4c127
 no-rdo: Use entropy encoder for bit estimation.

 Instead of me-bit estimation, use entropy encoder.

 diff -r ec6b4d35f110 -r 4b4332d03883 source/Lib/TLibEncoder/TEncSearch.h
 --- a/source/Lib/TLibEncoder/TEncSearch.h   Thu Oct 31 00:09:49 2013
 -0500
 +++ b/source/Lib/TLibEncoder/TEncSearch.h   Thu Oct 31 12:38:27 2013
 +0530
 @@ -165,6 +165,12 @@

  void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx, bool bLumaOnly, TComYuv* reconYuv);

 +//
 ---
 +// compute symbol bits
 +//
 ---
 +
 +uint32_t xSymbolBitsInter(TComDataCU* cu);
 +
  protected:

  //
 
 @@ -232,12 +238,6 @@
   UInt64 rdCost, uint32_t outBits, uint32_t
 outDist, uint32_t *puiZeroDist);
  void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 absTUPartIdx, TShortYUV* resiYuv, uint32_t depth, bool bSpatial);

 -//
 ---
 -// compute symbol bits
 -//
 ---
 -
 -uint32_t xSymbolBitsInter(TComDataCU* cu);
 -
  void setWpScalingDistParam(TComDataCU* cu, int refIdx, int picList);
  };
  }
 diff -r ec6b4d35f110 -r 4b4332d03883 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Thu Oct 31 00:09:49 2013 -0500
 +++ b/source/encoder/compress.cpp   Thu Oct 31 12:38:27 2013 +0530
 @@ -228,6 +228,9 @@
  int part = partitionFromSizes(outTempCU-getWidth(0),
 outTempCU-getHeight(0));
  uint32_t distortion =
 primitives.sse_pp[part](m_origYuv[depth]-getLumaAddr(),
 m_origYuv[depth]-getStride(),

  outPredYuv-getLumaAddr(), outPredYuv-getStride());
 +
  
 m_rdGoOnSbacCoder-load(m_rdSbacCoders[outTempCU-getDepth(0)][CI_CURR_BEST]);
 +outTempCU-m_totalBits = m_search-xSymbolBitsInter(outTempCU);
 +
  outTempCU-m_totalCost = m_rdCost-calcRdCost(distortion,
 outTempCU-m_totalBits);
  }

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH RFC] presets: adjust presets to increase spread and align closer with x264 presets

2013-11-07 Thread Deepthi Nandakumar
Since the default preset is medium, shouldnt x265_param_default apply
medium settings only? So that the if (!strcmp(preset, medium)) block here
is empty??
In that case, the preset (if specified) will change only the ones which are
different from medium?

Also, there are a few params here that
1. arent used and arent initialised properly (eg, constrainedIntra,
weightedBipred)  lets remove them and add them back when the feature
exists.
2. that are dependent on other param fields, like EnableRdo, EnabledRDOQ
depends on RDLevel. This particular case is handled safely inside
encoder::configure, but not sure about all such cases.

Just thought this is a good time for springcleaning.
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit

2013-11-07 Thread Deepthi Nandakumar
I have a few questions.

1. Do we need so many local variables?

2. Why are we adding outTempCU-cost to totalCost and then comparing
against outBestCU-cost? That doesnt make much sense to me. AFAIk,
outTempCU does not contain any valid data - we should remove this.

3. Should we be adding costCUColocated0 and costCUColocated1 also? Adding
up spatial and temporal costs, and then comparing against a threshold
derived from spatial costs - umm, no. Lets leave these out.

4. The rest of it looks ok, logically. But now you may need to re-tune this
with different weights.

Best,
Deepthi









On Thu, Nov 7, 2013 at 4:59 PM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddy
 # Date 1383823751 -19800
 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075
 # Parent  0a1b379be359cbcf76140ac392104c856a037c78
 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for
 early exit

 Early exit is done when CU cost at depth n is lessthan sum of 60% of
 avgcost of all CU's
 and 40% of avgcost of neighbour CU's at same depth.

 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp
 --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530
 @@ -69,6 +69,14 @@
  m_ssimCnt = 0;
  m_frameTime = 0.0;
  m_elapsedCompressTime = 0.0;
 +m_avgCost[0] = 0;
 +m_avgCost[1] = 0;
 +m_avgCost[2] = 0;
 +m_avgCost[3] = 0;
 +m_count[0] = 0;
 +m_count[1] = 0;
 +m_count[2] = 0;
 +m_count[3] = 0;
  }

  TComPic::~TComPic()
 diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h
 --- a/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/Lib/TLibCommon/TComPic.h   Thu Nov 07 16:59:11 2013 +0530
 @@ -95,6 +95,8 @@
  MD5Contextm_state[3];
  uint32_t  m_crc[3];
  uint32_t  m_checksum[3];
 +UInt64m_avgCost[4];
 +uint32_t  m_count[4];

  /* SSIM values per frame */
  doublem_ssim;
 diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Thu Nov 07 18:17:52 2013 +0800
 +++ b/source/encoder/compress.cpp   Thu Nov 07 16:59:11 2013 +0530
 @@ -567,13 +567,14 @@
  if (bSubBranch  bTrySplitDQP  depth  g_maxCUDepth - g_addCUDepth)
  {
  #if EARLY_EXIT // turn ON this to enable early exit
 -// early exit when the RD cost of best mode at depth n is less
 than the avgerage of RD cost of the
 -// CU's(above, aboveleft, aboveright, left, colocated) at depth
 n of previosuly coded CU's
 +// early exit when the RD cost of best mode at depth n is less
 than the sum of avgerage of RD cost of the neighbour
 +// CU's(above, aboveleft, aboveright, left, colocated) and all
 CU's at depth n  with weightage for each quantity
  if (outBestCU != 0)
  {
 -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCost = 0, avgCost = 0;
 +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0,
 costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0,
 costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0;
 +double avgCost = 0;
  UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0,
 countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0,
 countCUColocated1 = 0;
 -UInt64 totalCount = 0;
 +UInt64 totalCountNeigh = 0, totalCountAll = 0;
  TComDataCU* above = outTempCU-getCUAbove();
  TComDataCU* aboveLeft = outTempCU-getCUAboveLeft();
  TComDataCU* aboveRight = outTempCU-getCUAboveRight();
 @@ -614,10 +615,15 @@
  countCUColocated1 = colocated1-m_count[depth];
  }

 -totalCost = costCU + costCUAbove + costCUAboveLeft +
 costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
 -totalCount = countCU + countCUAbove + countCUAboveLeft +
 countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
 -if (totalCount != 0)
 -avgCost = totalCost / totalCount;
 +totalCostNeigh = costCU + costCUAbove + costCUAboveLeft +
 costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
 +totalCountNeigh = countCU + countCUAbove + countCUAboveLeft +
 countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
 +
 +totalCostAll = (outTempCU-getPic()-m_avgCost[depth] *
 outTempCU-getPic()-m_count[depth]) - totalCostNeigh;
 +totalCountAll = outTempCU-getPic()-m_count[depth] -
 totalCountNeigh;
 +
 +//giving 60% weight to all CU's and 40% weight to neighbour
 CU's
 +if (totalCountAll)
 +avgCost = ((0.6 * totalCostAll) + (0.4 * 

Re: [x265] [PATCH] TShortYUV.cpp, added code to use new pixelsub_ps asm primitives

2013-11-13 Thread Deepthi Nandakumar
This patch cannot be accepted.

1. For generic 8bpp I420 optimizations, we can avoid the extra call to
partitionFromSizes in chroma by passing in part, instead of partsize. This
will remove the downshift and then upscale by 2 (!!)

2. We will need to handle multiple color spaces separately as luma and
chroma tables are now aligned only for I420.


On Wed, Nov 13, 2013 at 12:31 PM, muru...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Murugan Vairavel muru...@multicorewareinc.com
 # Date 1384326072 -19800
 #  Wed Nov 13 12:31:12 2013 +0530
 # Node ID 69d4c1cfc8bed7c63bfdaa1073196e0874d14ebe
 # Parent  c4ca80d19105ccf1ba2ec14dd65915f2820a660d
 TShortYUV.cpp, added code to use new pixelsub_ps asm primitives

 diff -r c4ca80d19105 -r 69d4c1cfc8be source/common/TShortYUV.cpp
 --- a/source/common/TShortYUV.cpp   Tue Nov 12 19:10:23 2013 +0530
 +++ b/source/common/TShortYUV.cpp   Wed Nov 13 12:31:12 2013 +0530
 @@ -95,7 +95,8 @@
  int src1Stride = srcYuv1-getStride();
  int dstStride  = m_width;

 -primitives.pixelsub_ps(x, y, dst, dstStride, src0, src1, src0Stride,
 src1Stride);
 +int part = partitionFromSizes(x, y);
 +primitives.luma_sub_ps[part](dst, dstStride, src0, src1, src0Stride,
 src1Stride);
  }

  void TShortYUV::subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1,
 unsigned int trUnitIdx, unsigned int partSize)
 @@ -113,8 +114,9 @@
  int src1Stride = srcYuv1-getCStride();
  int dstStride  = m_cwidth;

 -primitives.pixelsub_ps(x, y, dstU, dstStride, srcU0, srcU1,
 src0Stride, src1Stride);
 -primitives.pixelsub_ps(x, y, dstV, dstStride, srcV0, srcV1,
 src0Stride, src1Stride);
 +int part = partitionFromSizes(2 * x, 2 * y);
 +primitives.chroma_sub_ps[part](dstU, dstStride, srcU0, srcU1,
 src0Stride, src1Stride);
 +primitives.chroma_sub_ps[part](dstV, dstStride, srcV0, srcV1,
 src0Stride, src1Stride);
  }

  void TShortYUV::addClip(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned
 int trUnitIdx, unsigned int partSize)
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] pixelsub_ps, Removed unused old code

2013-11-13 Thread Deepthi Nandakumar
On hold until earlier patch has been fixed.


On Wed, Nov 13, 2013 at 12:46 PM, muru...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Murugan Vairavel muru...@multicorewareinc.com
 # Date 1384326967 -19800
 #  Wed Nov 13 12:46:07 2013 +0530
 # Node ID 481cdfc251de0f99ef0a3c4fd53c786b79b5f182
 # Parent  69d4c1cfc8bed7c63bfdaa1073196e0874d14ebe
 pixelsub_ps, Removed unused old code

 diff -r 69d4c1cfc8be -r 481cdfc251de source/common/pixel.cpp
 --- a/source/common/pixel.cpp   Wed Nov 13 12:31:12 2013 +0530
 +++ b/source/common/pixel.cpp   Wed Nov 13 12:46:07 2013 +0530
 @@ -971,7 +971,6 @@
  p.weightpUniPixel = weightUnidirPix;
  p.weightpUni = weightUnidir;

 -p.pixelsub_ps = pixelsub_ps_c;
  p.pixeladd_ss = pixeladd_ss_c;

  p.scale1D_128to64 = scale1D_128to64;
 diff -r 69d4c1cfc8be -r 481cdfc251de source/common/primitives.h
 --- a/source/common/primitives.hWed Nov 13 12:31:12 2013 +0530
 +++ b/source/common/primitives.hWed Nov 13 12:46:07 2013 +0530
 @@ -162,7 +162,6 @@
  typedef void (*blockcpy_sp_t)(int bx, int by, int16_t *dst, intptr_t
 dstride, pixel *src, intptr_t sstride); // dst is aligned
  typedef void (*blockcpy_ps_t)(int bx, int by, pixel *dst, intptr_t
 dstride, int16_t *src, intptr_t sstride); // dst is aligned
  typedef void (*blockcpy_sc_t)(int bx, int by, int16_t *dst, intptr_t
 dstride, uint8_t *src, intptr_t sstride); // dst is aligned
 -typedef void (*pixelsub_ps_t)(int bx, int by, int16_t *dst, intptr_t
 dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
  typedef void (*pixeladd_ss_t)(int bx, int by, int16_t *dst, intptr_t
 dstride, int16_t *src0, int16_t *src1, intptr_t sstride0, intptr_t
 sstride1);
  typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0,
 intptr_t sstride0, pixel *src1, intptr_t sstride1, int weight);
  typedef void (*blockfill_s_t)(int16_t *dst, intptr_t dstride, int16_t
 val);
 @@ -277,7 +276,6 @@

  weightpUni_tweightpUni;
  weightpUniPixel_t weightpUniPixel;
 -pixelsub_ps_t   pixelsub_ps;
  pixeladd_ss_t   pixeladd_ss;
  pixelavg_pp_t   pixelavg_pp[NUM_LUMA_PARTITIONS];

 diff -r 69d4c1cfc8be -r 481cdfc251de source/common/vec/blockcopy-sse3.cpp
 --- a/source/common/vec/blockcopy-sse3.cpp  Wed Nov 13 12:31:12 2013
 +0530
 +++ b/source/common/vec/blockcopy-sse3.cpp  Wed Nov 13 12:46:07 2013
 +0530
 @@ -170,55 +170,6 @@
  }
  }

 -void pixelsub_ps(int bx, int by, int16_t *dst, intptr_t dstride, uint8_t
 *src0, uint8_t *src1, intptr_t sstride0, intptr_t sstride1)
 -{
 -size_t aligncheck = (size_t)dst | (size_t)src0 | bx | sstride0 |
 sstride1 | dstride;
 -
 -if (!(aligncheck  15))
 -{
 -// fast path, multiples of 16 pixel wide blocks
 -for (int y = 0; y  by; y++)
 -{
 -for (int x = 0; x  bx; x += 16)
 -{
 -__m128i word0, word1;
 -__m128i word3, word4;
 -__m128i mask = _mm_setzero_si128();
 -
 -word0 = _mm_load_si128((__m128i const*)(src0 + x));//
 load 16 bytes from src1
 -word1 = _mm_load_si128((__m128i const*)(src1 + x));//
 load 16 bytes from src2
 -
 -word3 = _mm_unpacklo_epi8(word0, mask);// interleave
 with zero extensions
 -word4 = _mm_unpacklo_epi8(word1, mask);
 -_mm_store_si128((__m128i*)dst[x], _mm_subs_epi16(word3,
 word4));// store block into dst
 -
 -word3 = _mm_unpackhi_epi8(word0, mask);// interleave
 with zero extensions
 -word4 = _mm_unpackhi_epi8(word1, mask);
 -_mm_store_si128((__m128i*)dst[x + 8],
 _mm_subs_epi16(word3, word4));// store block into dst
 -}
 -
 -src0 += sstride0;
 -src1 += sstride1;
 -dst += dstride;
 -}
 -}
 -else
 -{
 -// slow path, irregular memory alignments or sizes
 -for (int y = 0; y  by; y++)
 -{
 -for (int x = 0; x  bx; x++)
 -{
 -dst[x] = (int16_t)(src0[x] - src1[x]);
 -}
 -
 -src0 += sstride0;
 -src1 += sstride1;
 -dst += dstride;
 -}
 -}
 -}
 -
  void pixeladd_ss(int bx, int by, int16_t *dst, intptr_t dstride, int16_t
 *src0, int16_t *src1, intptr_t sstride0, intptr_t sstride1)
  {
  size_t aligncheck = (size_t)dst | (size_t)src0 | sstride0 | sstride1
 | dstride;
 @@ -315,7 +266,6 @@
  p.blockcpy_pp = blockcopy_pp;
  p.blockcpy_ps = blockcopy_ps;
  p.blockcpy_sp = blockcopy_sp;
 -p.pixelsub_ps = pixelsub_ps;
  p.pixeladd_ss = pixeladd_ss;
  #endif // if HIGH_BIT_DEPTH
  }
 diff -r 69d4c1cfc8be -r 481cdfc251de source/test/pixelharness.cpp
 --- a/source/test/pixelharness.cpp  Wed Nov 13 12:31:12 2013 +0530
 +++ b/source/test/pixelharness.cpp  Wed Nov 13 12:46:07 2013 +0530
 @@ -358,29 +358,6 @@
  return true;
  

Re: [x265] [PATCH] TComYuv.cpp, blockcpy_pp asm integration

2013-11-14 Thread Deepthi Nandakumar
This cant be applied until the csp changes have been incorporated in.


On Wed, Nov 13, 2013 at 4:23 PM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1384339140 -19800
 # Node ID c0da70471ba63f052bd0e0cdf81af3d0ca9150a4
 # Parent  c4ca80d19105ccf1ba2ec14dd65915f2820a660d
 TComYuv.cpp, blockcpy_pp asm integration

 diff -r c4ca80d19105 -r c0da70471ba6 source/Lib/TLibCommon/TComYuv.cpp
 --- a/source/Lib/TLibCommon/TComYuv.cpp Tue Nov 12 19:10:23 2013 +0530
 +++ b/source/Lib/TLibCommon/TComYuv.cpp Wed Nov 13 16:09:00 2013 +0530
 @@ -245,10 +245,12 @@

  void TComYuv::copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height, bool bLuma, bool bChroma)
  {
 +int part = partitionFromSizes(width, height);
 +
  if (bLuma)
 -copyPartToPartLuma(dstPicYuv, partIdx, width, height);
 +copyPartToPartLuma(dstPicYuv, partIdx, part);
  if (bChroma)
 -copyPartToPartChroma(dstPicYuv, partIdx, width  m_hChromaShift,
 height  m_vChromaShift);
 +copyPartToPartChroma(dstPicYuv, partIdx, part);
  }

  void TComYuv::copyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height, bool bLuma, bool bChroma)
 @@ -259,7 +261,7 @@
  copyPartToPartChroma(dstPicYuv, partIdx, width  m_hChromaShift,
 height  m_vChromaShift);
  }

 -void TComYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height)
 +void TComYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t part)
  {
  Pel* src = getLumaAddr(partIdx);
  Pel* dst = dstPicYuv-getLumaAddr(partIdx);
 @@ -269,7 +271,6 @@
  uint32_t srcstride = getStride();
  uint32_t dststride = dstPicYuv-getStride();

 -int part = partitionFromSizes(width, height);
  primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
  }

 @@ -285,7 +286,7 @@
  primitives.luma_copy_ps[part](dst, dststride, src, srcstride);
  }

 -void TComYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height)
 +void TComYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t part)
  {
  Pel* srcU = getCbAddr(partIdx);
  Pel* srcV = getCrAddr(partIdx);
 @@ -297,8 +298,8 @@
  uint32_t srcstride = getCStride();
  uint32_t dststride = dstPicYuv-getCStride();

 -primitives.blockcpy_pp(width, height, dstU, dststride, srcU,
 srcstride);
 -primitives.blockcpy_pp(width, height, dstV, dststride, srcV,
 srcstride);
 +primitives.chroma_copy_pp[part](dstU, dststride, srcU, srcstride);
 +primitives.chroma_copy_pp[part](dstV, dststride, srcV, srcstride);
  }

  void TComYuv::copyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t
 partIdx, uint32_t width, uint32_t height)
 diff -r c4ca80d19105 -r c0da70471ba6 source/Lib/TLibCommon/TComYuv.h
 --- a/source/Lib/TLibCommon/TComYuv.h   Tue Nov 12 19:10:23 2013 +0530
 +++ b/source/Lib/TLibCommon/TComYuv.h   Wed Nov 13 16:09:00 2013 +0530
 @@ -136,9 +136,9 @@
  //  Copy YUV partition buffer to other YUV partition buffer
  voidcopyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height, bool bLuma = true, bool bChroma = true);
  voidcopyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height, bool bLuma = true, bool bChroma = true);
 -voidcopyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height);
 +voidcopyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t part);
  voidcopyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height);
 -voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height);
 +voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t part);
  voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height);

  voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx,
 uint32_t width, uint32_t height, uint32_t chromaId);
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops

2013-11-18 Thread Deepthi Nandakumar
Pushed. But next time, please organize your patches more clearly.

1. Add C primitive, if it does not exist.
2. Add the function pointer declarations and new primitive declarations to
EncoderPrimitives struct.
3. Add testbench code for primitives.
4. Add asm code.

Once all above patches have been reviewed, pushed and tested on all
platforms, then you can integrate it with the actual encoder.




On Mon, Nov 18, 2013 at 3:23 PM, dnyanesh...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Dnyaneshwar G dnyanesh...@multicorewareinc.com
 # Date 1384768323 -19800
 #  Mon Nov 18 15:22:03 2013 +0530
 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1
 # Parent  ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0
 TComYuv::addAvg, primitive function for luma and chroma loops

 diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp
 --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530
 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530
 @@ -589,9 +589,7 @@

  void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t
 partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
  {
 -int x, y;
  uint32_t src0Stride, src1Stride, dststride;
 -int shiftNum, offset;

  int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx);
  int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx);
 @@ -605,61 +603,24 @@
  Pel* dstU = getCbAddr(partUnitIdx);
  Pel* dstV = getCrAddr(partUnitIdx);

 +int part = partitionFromSizes(width, height);
 +
  if (bLuma)
  {
  src0Stride = srcYuv0-m_width;
  src1Stride = srcYuv1-m_width;
  dststride  = getStride();
 -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 -offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;

 -for (y = 0; y  height; y++)
 -{
 -for (x = 0; x  width; x += 4)
 -{
 -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] +
 offset)  shiftNum);
 -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] +
 offset)  shiftNum);
 -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] +
 offset)  shiftNum);
 -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] +
 offset)  shiftNum);
 -}
 -
 -srcY0 += src0Stride;
 -srcY1 += src1Stride;
 -dstY  += dststride;
 -}
 +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride,
 srcY1, src1Stride);
  }
  if (bChroma)
  {
 -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 -offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 -
  src0Stride = srcYuv0-m_cwidth;
  src1Stride = srcYuv1-m_cwidth;
  dststride  = getCStride();

 -width  = m_hChromaShift;
 -height = m_vChromaShift;
 -
 -for (y = height - 1; y = 0; y--)
 -{
 -for (x = width - 1; x = 0; )
 -{
 -// note: chroma min width is 2
 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) 
 shiftNum);
 -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) 
 shiftNum);
 -x--;
 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) 
 shiftNum);
 -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) 
 shiftNum);
 -x--;
 -}
 -
 -srcU0 += src0Stride;
 -srcU1 += src1Stride;
 -srcV0 += src0Stride;
 -srcV1 += src1Stride;
 -dstU  += dststride;
 -dstV  += dststride;
 -}
 +primitives.chroma_addAvg[part](dstU, dststride, srcU0,
 src0Stride, srcU1, src1Stride);
 +primitives.chroma_addAvg[part](dstV, dststride, srcV0,
 src0Stride, srcV1, src1Stride);
  }
  }

 diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp
 --- a/source/common/pixel.cpp   Mon Nov 18 12:26:44 2013 +0530
 +++ b/source/common/pixel.cpp   Mon Nov 18 15:22:03 2013 +0530
 @@ -794,6 +794,27 @@
  a += dstride;
  }
  }
 +
 +templateint bx, int by
 +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t
 src0Stride, int16_t* src1, intptr_t src1Stride)
 +{
 +int shiftNum, offset;
 +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 +offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 +
 +for (int y = 0; y  by; y++)
 +{
 +for (int x = 0; x  bx; x += 2)
 +{
 +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) 
 shiftNum);
 +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) 
 shiftNum);
 +}
 +
 +src0 += src0Stride;
 +src1 += src1Stride;
 +dst  += dstStride;
 +}
 +}
  }  // end anonymous namespace

  namespace x265 {
 @@ -835,12 +856,14 @@
  p.satd[LUMA_16x64] = satd816, 64;

  #define CHROMA(W, H) \
 +p.chroma_addAvg[CHROMA_ ## W ## x ## H]  = addAvgW, H; \
  p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## 

Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

2013-11-18 Thread Deepthi Nandakumar
In encoder::configure, there should be some check for --b-adapt 0 --bframes
0, in which case print warning and disable b-pyramid.


On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy 
g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1384769433 -19800
 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969
 # Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
 b-pyramid implementation: Allow the use of B-frames as references for non
 B and B frames

 when we enable the b-pyramid the bitrates efficienctly reduced and there
 is not much diff in the performance
 and the PSNR 00. increased some of the clips and decreased some of clips

 Test results for reference when enable and disable the b-pyramid:
 cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
 Enable B-reference  : --b-pyramid=1
 Disable B-reference : --b-pyramid=0

 Results:
 Enable / Disable

 clip - FourPeople_1280x720_60.yuv
 Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
 Bitrates - 516.30 kb/s / 544.68 kb/s
 PSNR - 39.725 / 39.701

 clip - BasketballDrive_1920x1080_50.y4m
 Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
 Bitrates -  4166.92 kb/s / 4370.43 kb/s
 PSNR -  37.261 / 37.268

 clip - Johnny_1280x720_60.y4m
 Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
 Bitrates - 304.29 kb/s / 328.84 kb/s
 PSNR - 40.605 / 40.551

 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
 Bitrates - 3496.84 kb/s / 3683.93 kb/s
 PSNR - 35.645 / 35.660

 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
 --- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.cpp  Mon Nov 18 15:40:33 2013 +0530
 @@ -54,6 +54,7 @@

  static int parseCspName(const char *arg, int error);
  static int parseName(const char *arg, const char * const * names, int
 error);
 +static int parse_enum(const char *, const char * const * names, int *dst);

  using namespace x265;

 @@ -165,6 +166,7 @@
  param-bframes = 3;
  param-lookaheadDepth = 40;
  param-bFrameAdaptive = X265_B_ADAPT_FAST;
 +param-bpyramid = 0;
  param-scenecutThreshold = 40; /* Magic number pulled in from x264*/

  /* Intra Coding Tools */
 @@ -532,7 +534,7 @@
  }

  CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be
 negative);
 -
 +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1);
  return check_failed;
  }

 @@ -620,6 +622,7 @@
  x265_log(param, X265_LOG_INFO, RDpenalty:
 %d\n, param-rdPenalty);
  }
  x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d /
 %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive);
 +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d /
 %d / %d\n, param-bpyramid, param-bEnableWeightedPred,
 param-maxNumReferences);
  x265_log(param, X265_LOG_INFO, tools: );
  #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR)
  TOOLOPT(param-bEnableRectInter, rect);
 @@ -628,7 +631,6 @@
  TOOLOPT(param-bEnableConstrainedIntra, cip);
  TOOLOPT(param-bEnableEarlySkip, esd);
  fprintf(stderr, rd=%d , param-rdLevel);
 -fprintf(stderr, ref=%d , param-maxNumReferences);

  TOOLOPT(param-bEnableLoopFilter, lft);
  if (param-bEnableSAO)
 @@ -650,7 +652,6 @@
  else
  fprintf(stderr, tskip );
  }
 -TOOLOPT(param-bEnableWeightedPred, weightp);
  TOOLOPT(param-bEnableWeightedBiPred, weightbp);
  TOOLOPT(param-rc.aqMode, aq);
  fprintf(stderr, \n);
 @@ -747,6 +748,15 @@
  }
  OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror);
  OPT(me)p-searchMethod = ::parseName(value,
 x265_motion_est_names, berror);
 +OPT(b-pyramid)
 +{
 +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid);
 +if (berror)
 +{
 +berror = 0;
 +p-bpyramid = atoi(value);
 +}
 +}
  else
  return X265_PARAM_BAD_NAME;
  #undef OPT
 @@ -802,6 +812,7 @@
  BOOL(p-bEnableSAO, sao);
  s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
  s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
 +s += sprintf(s,  b-pyramid=%d, p-bpyramid);
  #undef BOOL

  return buf;
 @@ -843,3 +854,13 @@
  error = 1;
  return a;
  }
 +static int parse_enum(const char *arg, const char * const * names, int
 *dst)
 +{
 +for (int i = 0; names[i]; i++)
 +if (!strcmp(arg, names[i]))
 +{
 +*dst = i;
 +return 0;
 +}
 +return -1;
 +}
 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
 --- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.hMon Nov 18 15:40:33 2013 +0530
 @@ -107,6 +107,7 @@
  #define X265_LOG2(x)  log2(x)
  #endif

 +static const char * const x265_b_pyramid_names[] = {none, normal, 0};
  /* defined in common.cpp */
  int64_t 

Re: [x265] [PATCH] cleanup: removed unused code in pixel-a.asm

2013-11-25 Thread Deepthi Nandakumar
Does not apply at the tip.


On Mon, Nov 25, 2013 at 11:40 AM, yuva...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Yuvaraj Venkatesh yuva...@multicorewareinc.com
 # Date 1385359751 -19800
 #  Mon Nov 25 11:39:11 2013 +0530
 # Node ID 90a80def0f1aabdf29e1f08dd0f2263d8e6af805
 # Parent  c0c862dc71fbd021efd3922de99da4f2f93e81f4
 cleanup: removed unused code in pixel-a.asm

 diff -r c0c862dc71fb -r 90a80def0f1a source/common/x86/pixel-a.asm
 --- a/source/common/x86/pixel-a.asm Sun Nov 24 17:34:12 2013 +0800
 +++ b/source/common/x86/pixel-a.asm Mon Nov 25 11:39:11 2013 +0530
 @@ -7157,173 +7157,6 @@
  %endif ; !ARCH_X86_64
  %endmacro ; SA8D


 -;=
 -; SA8D_SATD

 -;=
 -
 -; %1: vertical/horizontal mode
 -; %2-%5: sa8d output regs (m0,m1,m2,m3,m4,m5,m8,m9)
 -; m10: satd result
 -; m6, m11-15: tmp regs
 -%macro SA8D_SATD_8x4 5
 -%if %1
 -LOAD_DIFF_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1
 -HADAMARD   0, sumsub, %2, %3, 6
 -HADAMARD   0, sumsub, %4, %5, 6
 -SBUTTERFLYwd, %2, %3, 6
 -SBUTTERFLYwd, %4, %5, 6
 -HADAMARD2_2D  %2, %4, %3, %5, 6, dq
 -
 -mova   m12, m%2
 -mova   m13, m%3
 -mova   m14, m%4
 -mova   m15, m%5
 -HADAMARD 0, sumsub, %2, %3, 6
 -HADAMARD 0, sumsub, %4, %5, 6
 -SBUTTERFLY qdq, 12, 13, 6
 -HADAMARD   0, amax, 12, 13, 6
 -SBUTTERFLY qdq, 14, 15, 6
 -paddw m10, m12
 -HADAMARD   0, amax, 14, 15, 6
 -paddw m10, m14
 -%else
 -LOAD_SUMSUB_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1
 -HADAMARD4_V %2, %3, %4, %5, 6
 -
 -pabswm12, m%2 ; doing the abs first is a slight advantage
 -pabswm14, m%4
 -pabswm13, m%3
 -pabswm15, m%5
 -HADAMARD 1, max, 12, 14, 6, 11
 -paddwm10, m12
 -HADAMARD 1, max, 13, 15, 6, 11
 -paddwm10, m13
 -%endif
 -%endmacro ; SA8D_SATD_8x4
 -
 -; %1: add spilled regs?
 -; %2: spill regs?
 -%macro SA8D_SATD_ACCUM 2
 -%if HIGH_BIT_DEPTH
 -pmaddwd m10, [pw_1]
 -HADDUWD  m0, m1
 -%if %1
 -paddd   m10, temp1
 -padddm0, temp0
 -%endif
 -%if %2
 -mova  temp1, m10
 -pxorm10, m10
 -%endif
 -%elif %1
 -paddwm0, temp0
 -%endif
 -%if %2
 -mova  temp0, m0
 -%endif
 -%endmacro
 -
 -%macro SA8D_SATD 0
 -%define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
 -cglobal pixel_sa8d_satd_8x8_internal
 -SA8D_SATD_8x4 vertical, 0, 1, 2, 3
 -SA8D_SATD_8x4 vertical, 4, 5, 8, 9
 -
 -%if vertical ; sse2-style
 -HADAMARD2_2D 0, 4, 2, 8, 6, qdq, amax
 -HADAMARD2_2D 1, 5, 3, 9, 6, qdq, amax
 -%else; complete sa8d
 -SUMSUB_BADC w, 0, 4, 1, 5, 12
 -HADAMARD 2, sumsub, 0, 4, 12, 11
 -HADAMARD 2, sumsub, 1, 5, 12, 11
 -SUMSUB_BADC w, 2, 8, 3, 9, 12
 -HADAMARD 2, sumsub, 2, 8, 12, 11
 -HADAMARD 2, sumsub, 3, 9, 12, 11
 -HADAMARD 1, amax, 0, 4, 12, 11
 -HADAMARD 1, amax, 1, 5, 12, 4
 -HADAMARD 1, amax, 2, 8, 12, 4
 -HADAMARD 1, amax, 3, 9, 12, 4
 -%endif
 -
 -; create sa8d sub results
 -paddwm1, m2
 -paddwm0, m3
 -paddwm0, m1
 -
 -SAVE_MM_PERMUTATION
 -ret
 -

 -;---
 -; uint64_t pixel_sa8d_satd_16x16( pixel *, intptr_t, pixel *, intptr_t )

 -;---
 -cglobal pixel_sa8d_satd_16x16, 4,8-(mmsize/32),16,SIZEOF_PIXEL*mmsize
 -%define temp0 [rsp+0*mmsize]
 -%define temp1 [rsp+1*mmsize]
 -FIX_STRIDES r1, r3
 -%if vertical==0
 -mova m7, [hmul_8p]
 -%endif
 -lea  r4, [3*r1]
 -lea  r5, [3*r3]
 -pxorm10, m10
 -
 -%if mmsize==32
 -call pixel_sa8d_satd_8x8_internal
 -SA8D_SATD_ACCUM 0, 1
 -call pixel_sa8d_satd_8x8_internal
 -SA8D_SATD_ACCUM 1, 0
 -vextracti128 xm1, m0, 1
 -vextracti128 xm2, m10, 1
 -paddw   xm0, xm1
 -paddw  xm10, xm2
 -%else
 -lea  r6, [r2+8*SIZEOF_PIXEL]
 -lea  r7, [r0+8*SIZEOF_PIXEL]
 -
 -call pixel_sa8d_satd_8x8_internal
 -SA8D_SATD_ACCUM 0, 1
 -call pixel_sa8d_satd_8x8_internal
 -SA8D_SATD_ACCUM 1, 1
 -
 -mov  r0, r7
 -mov  r2, r6
 -
 -call pixel_sa8d_satd_8x8_internal
 -SA8D_SATD_ACCUM 1, 1
 -call pixel_sa8d_satd_8x8_internal
 -SA8D_SATD_ACCUM 1, 0
 -%endif
 -
 -; xop already has fast horizontal sums
 -%if cpuflag(sse4)  notcpuflag(xop)  HIGH_BIT_DEPTH==0
 -pmaddwd xm10, [pw_1]
 -HADDUWD xm0, xm1
 -phaddd  xm0, xm10   ;  sa8d1  sa8d2  satd1  satd2
 -pshufd  xm1, xm0, q2301 ;  sa8d2  sa8d1  satd2  satd1
 -paddd   xm0, xm1;   sa8d   sa8d   satd   satd
 -movdr0d, xm0
 -pextrd  eax, xm0, 2
 -%else
 -%if HIGH_BIT_DEPTH
 -HADDD   xm0, xm1
 -HADDD  xm10, xm2
 -%else
 -HADDUW  xm0, xm1

Re: [x265] [PATCH] RD merge and cost fixes: use sa8d_inter, add early-skip param that was missed in earlier commit

2013-11-28 Thread Deepthi Nandakumar
Reverting the sa8d_inter changes. This block always uses square CUs, so
sa8d primitives are sufficient.

# HG changeset patch
# User Deepthi Nandakumar deep...@multicorewareinc.com
# Date 1385631244 -19800
# Node ID 2ba6c26c9febdc8c57d3014c0cf98d4897d3992d
# Parent  ca8c57f0c53248a36db6d04639c39ac0e2829fcd
RD merge: add in early-skip param.

diff -r ca8c57f0c532 -r 2ba6c26c9feb source/encoder/compress.cpp
--- a/source/encoder/compress.cppThu Nov 28 13:52:19 2013 +0530
+++ b/source/encoder/compress.cppThu Nov 28 15:04:04 2013 +0530
@@ -420,7 +420,7 @@
 /* Compute  Merge Cost */
 xComputeCostMerge2Nx2N(m_bestMergeCU[depth], m_mergeCU[depth],
m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth]);

-if (!m_bestMergeCU[depth]-isSkipped(0))
+if (!(m_cfg-param.bEnableEarlySkip 
m_bestMergeCU[depth]-isSkipped(0)))
 {
 /*Compute 2Nx2N mode costs*/
 {



On Thu, Nov 28, 2013 at 2:54 PM, deep...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Deepthi Nandakumar deep...@multicorewareinc.com
 # Date 1385630646 -19800
 # Node ID 4f0b72baee90e9cf63ab2015b65f3d820c87d129
 # Parent  ca8c57f0c53248a36db6d04639c39ac0e2829fcd
 RD merge and cost fixes: use sa8d_inter, add early-skip param that was
 missed in earlier commit.

 diff -r ca8c57f0c532 -r 4f0b72baee90 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Thu Nov 28 13:52:19 2013 +0530
 +++ b/source/encoder/compress.cpp   Thu Nov 28 14:54:06 2013 +0530
 @@ -211,7 +211,7 @@
  outTempCU-m_totalBits = 0;
  m_search-predInterSearch(outTempCU, outPredYuv, bUseMRG, true,
 false);
  int part = g_convertToBit[outTempCU-getWidth(0)];
 -uint32_t distortion =
 primitives.sa8d[part](m_origYuv[depth]-getLumaAddr(),
 m_origYuv[depth]-getStride(),
 +uint32_t distortion =
 primitives.sa8d_inter[part](m_origYuv[depth]-getLumaAddr(),
 m_origYuv[depth]-getStride(),

  outPredYuv-getLumaAddr(), outPredYuv-getStride());
  outTempCU-m_totalCost = m_rdCost-calcRdSADCost(distortion,
 outTempCU-m_totalBits);
  }
 @@ -420,7 +420,7 @@
  /* Compute  Merge Cost */
  xComputeCostMerge2Nx2N(m_bestMergeCU[depth],
 m_mergeCU[depth], m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth]);

 -if (!m_bestMergeCU[depth]-isSkipped(0))
 +if (!(m_cfg-param.bEnableEarlySkip 
 m_bestMergeCU[depth]-isSkipped(0)))
  {
  /*Compute 2Nx2N mode costs*/
  {

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] log: output intra type statistics of I frame, bug fix in intra percentage calculation

2013-11-30 Thread Deepthi Nandakumar
Build fails.


On Thu, Nov 28, 2013 at 11:10 AM, kavi...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Kavitha Sampath kavi...@multicorewareinc.com
 # Date 1385616934 -19800
 #  Thu Nov 28 11:05:34 2013 +0530
 # Branch stable
 # Node ID 8519dc4a5b9e53f1ed6f2f52294d7caea2803bc3
 # Parent  f92e0c49a9f0a0e6f6db3eb247bd04431eb75b1e
 log: output intra type statistics of I frame, bug fix in intra percentage
 calculation

 diff -r f92e0c49a9f0 -r 8519dc4a5b9e source/Lib/TLibEncoder/TEncCu.cpp
 --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Nov 27 20:50:08 2013 -0600
 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Thu Nov 28 11:05:34 2013 +0530
 @@ -551,6 +551,11 @@
  }

  outTempCU-initEstData(depth, qp);
 +if (depth  g_maxCUDepth - 2)
 +{
 +memcpy(m_log-tempIntra[depth], m_log-cntIntra,
 sizeof(m_log-cntIntra));
 +memcpy(m_log-tempIntraDist[depth], m_log-cuIntraDistribution,
 sizeof(m_log-cuIntraDistribution));
 +}

  // further split
  if (bSubBranch  bTrySplitDQP  depth  g_maxCUDepth - g_addCUDepth)
 @@ -575,19 +580,9 @@

  
 m_rdSbacCoders[nextDepth][CI_CURR_BEST]-load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
  }

 -// The following if condition has to be commented out in
 case the early Abort based on comparison of parentCu cost, childCU cost is
 not required.
 -if (outBestCU-isIntra(0))
 -{
 -xCompressIntraCU(subBestPartCU[partUnitIdx],
 subTempPartCU[partUnitIdx], nextDepth);
 -}
 -else
 -{
 -xCompressIntraCU(subBestPartCU[partUnitIdx],
 subTempPartCU[partUnitIdx], nextDepth);
 -}
 -{
 -outTempCU-copyPartFrom(subBestPartCU[partUnitIdx],
 partUnitIdx, nextDepth); // Keep best part data to current temporary data.
 -
  xCopyYuv2Tmp(subBestPartCU[partUnitIdx]-getTotalNumPart() * partUnitIdx,
 nextDepth);
 -}
 +xCompressIntraCU(subBestPartCU[partUnitIdx],
 subTempPartCU[partUnitIdx], nextDepth);
 +outTempCU-copyPartFrom(subBestPartCU[partUnitIdx],
 partUnitIdx, nextDepth); // Keep best part data to current temporary data.
 +
  xCopyYuv2Tmp(subBestPartCU[partUnitIdx]-getTotalNumPart() * partUnitIdx,
 nextDepth);
  }
  else if (bInSlice)
  {
 @@ -635,28 +630,39 @@

  
 m_rdSbacCoders[nextDepth][CI_NEXT_BEST]-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
  if (outBestCU-m_totalCost  outTempCU-m_totalCost)
  {
 +if (depth  g_maxCUDepth - 2)
 +{
 +memcpy(m_log-cntIntra, m_log-tempIntra[depth],
 sizeof(m_log-tempIntra[depth]));
 +memcpy(m_log-cuIntraDistribution,
 m_log-tempIntraDist[depth], sizeof(m_log-tempIntraDist[depth]));
 +}


I'm uncomfortable seeing memcpy's in the innermost CU-analysis loop. Why is
this necessary?


  m_log-cntIntra[depth]++;
 -for (int i = 0; i  4; i++)
 +if (outBestCU-getLumaIntraDir()[0]  1)
 +m_log-cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
 +else
 +
  m_log-cuIntraDistribution[depth][outBestCU-getLumaIntraDir()[0]]++;
 +}
 +else
 +{
 +if (depth == g_maxCUDepth - 2)
  {
 -if (outTempCU-getPartitionSize(i) != SIZE_NxN)
 -m_log-cntIntra[depth + 1]--;
 -else
 -m_log-cntIntraNxN--;
 +for (int i = 0; i  16; i = i + 4)
 +{
 +if (outTempCU-getPartitionSize(i) != SIZE_NxN)
 +{
 +m_log-cntIntra[depth + 1]++;
 +if (outTempCU-getLumaIntraDir()[i]  1)
 +m_log-cuIntraDistribution[depth +
 1][ANGULAR_MODE_ID]++;
 +else
 +m_log-cuIntraDistribution[depth +
 1][outTempCU-getLumaIntraDir()[i]]++;
 +}
 +else
 +m_log-cntIntraNxN++;
 +}
  }
 -m_log-cntIntra[depth + 1] += boundaryCu;
  }
  xCheckBestMode(outBestCU, outTempCU, depth); // RD compare
 current prediction with split prediction.
  }

 -if (depth == g_maxCUDepth - 1  bSubBranch)
 -{
 -if (outBestCU-getPartitionSize(0) == SIZE_NxN)
 -{
 -m_log-cntIntraNxN++;
 -}
 -else
 -m_log-cntIntra[depth]++;
 -}
  outBestCU-copyToPic(depth); // Copy Best data to Picture for next
 partition prediction.

  // Copy Yuv data to picture Yuv
 diff -r f92e0c49a9f0 -r 8519dc4a5b9e source/Lib/TLibEncoder/TEncCu.h
 --- a/source/Lib/TLibEncoder/TEncCu.h   Wed Nov 27 20:50:08 2013 -0600
 +++ b/source/Lib/TLibEncoder/TEncCu.h   Thu Nov 28 11:05:34 2013 +0530
 @@ -66,6 +66,8 @@
  

Re: [x265] [PATCH] aq: bug fix for hash mismatch between recon with decoded output

2013-12-03 Thread Deepthi Nandakumar
Pushed. So what are the latest results for different sequences on enabling
aq-mode?


On Tue, Dec 3, 2013 at 4:31 PM, Aarthi Thirumalai 
aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1386068495 -19800
 #  Tue Dec 03 16:31:35 2013 +0530
 # Node ID 660ec2c027982db73366560ca8f600e5d86cc2e3
 # Parent  86d23688b0174e06f3949c81ac182ba3e83908d1
 aq: bug fix for hash mismatch between recon with decoded output

 diff -r 86d23688b017 -r 660ec2c02798 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Tue Dec 03 11:24:15 2013 +0530
 +++ b/source/encoder/compress.cpp   Tue Dec 03 16:31:35 2013 +0530
 @@ -74,6 +74,7 @@

  cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
  cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion,
 cu-m_totalBits);
 +xCheckDQP(cu);
  }

  void TEncCu::xComputeCostIntraInInter(TComDataCU* cu, PartSize partSize)
 @@ -302,6 +303,7 @@

  //No-residue mode
  m_search-encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
 bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
 m_tmpRecoYuv[depth], true);
 +xCheckDQP(outTempCU);

  tmp = outTempCU;
  outTempCU = outBestCU;
 @@ -313,6 +315,7 @@

  //Encode with residue
  m_search-estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv,
 m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);
 +xCheckDQP(outTempCU);

  if (outTempCU-m_totalCost  outBestCU-m_totalCost)//Choose best
 from no-residue mode and residue mode
  {
 @@ -485,6 +488,7 @@

  m_search-estimateRDInterCU(outBestCU, m_origYuv[depth],
 m_bestPredYuv[depth], m_tmpResiYuv[depth],
  m_bestResiYuv[depth],
 m_bestRecoYuv[depth], false);
 +xCheckDQP(outBestCU);

  if (m_bestMergeCU[depth]-m_totalCost 
 outBestCU-m_totalCost)
  {
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] rd level: use cu coeff array while computing coefficients instead of temp buffer

2013-12-16 Thread Deepthi Nandakumar
Build fails. Restore qtlayer.


On Mon, Dec 16, 2013 at 3:07 PM, deepthidev...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Deepthi Devaki deepthidev...@multicorewareinc.com
 # Date 1387185624 -19800
 # Node ID f20e5ab835cd7071d9ebeabff50f6b9fef4d3e39
 # Parent  9bb16a023918c342b907d106b4a3d59ec2473bc1
 rd level: use cu coeff array while computing coefficients instead of temp
 buffer

 diff -r 9bb16a023918 -r f20e5ab835cd source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Dec 16 14:33:57 2013
 +0530
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Dec 16 14:50:24 2013
 +0530
 @@ -3266,9 +3266,7 @@
  if (cu-getPredictionMode(0) == MODE_INTER)
  {
  residualTransformQuantInter(cu, 0, 0, resiYuv, cu-getDepth(0),
 true);
 -xSetResidualQTData(cu, 0, 0, NULL, cu-getDepth(0), false);
  uint32_t width  = cu-getWidth(0);
 -xSetResidualQTData(cu, 0, 0, resiYuv, cu-getDepth(0), true);
  reconYuv-addClip(predYuv, resiYuv, 0, width);

  if (cu-getMergeFlag(0)  cu-getPartitionSize(0) == SIZE_2Nx2N
  cu-getQtRootCbf(0) == 0)
 @@ -3322,10 +3320,10 @@
  if (bCheckFull)
  {
  const uint32_t numCoeffPerAbsPartIdxIncrement =
 cu-getSlice()-getSPS()-getMaxCUWidth() *
 cu-getSlice()-getSPS()-getMaxCUHeight() 
 (cu-getSlice()-getSPS()-getMaxCUDepth()  1);
 -const uint32_t qtlayer =
 cu-getSlice()-getSPS()-getQuadtreeTULog2MaxSize() - trSizeLog2;
 -TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] +
 (numCoeffPerAbsPartIdxIncrement * absPartIdx);
 -TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
 (numCoeffPerAbsPartIdxIncrement * absPartIdx  2);
 -TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
 (numCoeffPerAbsPartIdxIncrement * absPartIdx  2);
 +
 +TCoeff *coeffCurY = cu-getCoeffY() +
 (numCoeffPerAbsPartIdxIncrement * absPartIdx);
 +TCoeff *coeffCurU = cu-getCoeffCb() +
 (numCoeffPerAbsPartIdxIncrement * absPartIdx  2);
 +TCoeff *coeffCurV = cu-getCoeffCr() +
 (numCoeffPerAbsPartIdxIncrement * absPartIdx  2);

  int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;
  uint32_t absTUPartIdxC = absPartIdx;
 @@ -3370,64 +3368,55 @@

  if (absSumY)
  {
 -int16_t *curResiY =
 m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);
 +int16_t *curResiY = resiYuv-getLumaAddr(absTUPartIdx);

  m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA,
 cu-getSlice()-getSPS()-getQpBDOffsetY(), 0);

  int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
  assert(scalingListType  6);
 -assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE);
 -
  m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT,
 curResiY, MAX_CU_SIZE,  coeffCurY, trWidth, trHeight, scalingListType,
 false, lastPosY); //this is for inter mode only
 +
  m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT,
 curResiY, resiYuv-m_width,  coeffCurY, trWidth, trHeight, scalingListType,
 false, lastPosY); //this is for inter mode only
  }
  else
  {
 -int16_t *ptr =
  m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);
 -assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE);
 -
 +int16_t *ptr =  resiYuv-getLumaAddr(absTUPartIdx);
  assert(trWidth == trHeight);
 -primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr,
 MAX_CU_SIZE, 0);
 +primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr,
 resiYuv-m_width, 0);
  }

  if (bCodeChroma)
  {
  if (absSumU)
  {
 -int16_t *pcResiCurrU =
 m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
 +int16_t *pcResiCurrU = resiYuv-getCbAddr(absTUPartIdxC);

  int curChromaQpOffset =
 cu-getSlice()-getPPS()-getChromaCbQpOffset() +
 cu-getSlice()-getSliceQpDeltaCb();
  m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA,
 cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset);

  int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
  assert(scalingListType  6);
 -assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
 2);
 -
  m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT,
 pcResiCurrU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC,
 scalingListType, false, lastPosU);
 +
  m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT,
 pcResiCurrU, resiYuv-m_cwidth, coeffCurU, trWidthC, trHeightC,
 scalingListType, false, lastPosU);
  }
  else
  {
 -int16_t *ptr =
 m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
 -assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
 2);
 -
 +int16_t *ptr = resiYuv-getCbAddr(absTUPartIdxC);
  

Re: [x265] [PATCH] rc: Calculate the variance data for weighted prediction if aq-mode is disable

2013-12-16 Thread Deepthi Nandakumar
Am I missing something here? You have added code in the else part - if(AQ
is disabled), we are already calculating variance for weightP.


On Mon, Dec 16, 2013 at 5:43 PM, Gopu Govindaswamy 
g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1387195927 -19800
 # Node ID bdc7afa5ba5afee7113ac0d427fbfc660359716d
 # Parent  3dae450a06a4b390ce6fd947d5095b739b01c6d8
 rc: Calculate the variance data for weighted prediction if aq-mode is
 disable

 diff -r 3dae450a06a4 -r bdc7afa5ba5a source/encoder/ratecontrol.cpp
 --- a/source/encoder/ratecontrol.cppMon Dec 16 13:57:58 2013 +0530
 +++ b/source/encoder/ratecontrol.cppMon Dec 16 17:42:07 2013 +0530
 @@ -116,6 +116,15 @@
  for (int cuxy = 0; cuxy  cuCount; cuxy++ )
  pic-m_lowres.invQscaleFactor[cuxy] = 256;
  }
 +
 + /* Need variance data for weighted prediction */
 +if (cfg-param.bEnableWeightedPred)
 +{
 +for (int cuy = 0; cuy  maxRow; cuy++ )
 +for (int cux = 0; cux  maxCol; cux++ )
 +acEnergyCu(pic, cux, cuy);
 +}
 +
  }
  else
  {
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] rc: Calculate the variance data for weighted prediction if aq-mode is disable

2013-12-16 Thread Deepthi Nandakumar
Ughh - there's something wrong with my hg import. All clear now.


On Mon, Dec 16, 2013 at 10:34 PM, Deepthi Nandakumar 
deep...@multicorewareinc.com wrote:

 Am I missing something here? You have added code in the else part - if(AQ
 is disabled), we are already calculating variance for weightP.


 On Mon, Dec 16, 2013 at 5:43 PM, Gopu Govindaswamy 
 g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1387195927 -19800
 # Node ID bdc7afa5ba5afee7113ac0d427fbfc660359716d
 # Parent  3dae450a06a4b390ce6fd947d5095b739b01c6d8
 rc: Calculate the variance data for weighted prediction if aq-mode is
 disable

 diff -r 3dae450a06a4 -r bdc7afa5ba5a source/encoder/ratecontrol.cpp
 --- a/source/encoder/ratecontrol.cppMon Dec 16 13:57:58 2013 +0530
 +++ b/source/encoder/ratecontrol.cppMon Dec 16 17:42:07 2013 +0530
 @@ -116,6 +116,15 @@
  for (int cuxy = 0; cuxy  cuCount; cuxy++ )
  pic-m_lowres.invQscaleFactor[cuxy] = 256;
  }
 +
 + /* Need variance data for weighted prediction */
 +if (cfg-param.bEnableWeightedPred)
 +{
 +for (int cuy = 0; cuy  maxRow; cuy++ )
 +for (int cux = 0; cux  maxCol; cux++ )
 +acEnergyCu(pic, cux, cuy);
 +}
 +
  }
  else
  {
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] rc: Calculate the variance data for weighted prediction if aq-mode is disable

2013-12-16 Thread Deepthi Nandakumar
What about the variance value returned by acEnergyCu?


On Mon, Dec 16, 2013 at 10:43 PM, Deepthi Nandakumar 
deep...@multicorewareinc.com wrote:

 Ughh - there's something wrong with my hg import. All clear now.


 On Mon, Dec 16, 2013 at 10:34 PM, Deepthi Nandakumar 
 deep...@multicorewareinc.com wrote:

 Am I missing something here? You have added code in the else part - if(AQ
 is disabled), we are already calculating variance for weightP.


 On Mon, Dec 16, 2013 at 5:43 PM, Gopu Govindaswamy 
 g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1387195927 -19800
 # Node ID bdc7afa5ba5afee7113ac0d427fbfc660359716d
 # Parent  3dae450a06a4b390ce6fd947d5095b739b01c6d8
 rc: Calculate the variance data for weighted prediction if aq-mode is
 disable

 diff -r 3dae450a06a4 -r bdc7afa5ba5a source/encoder/ratecontrol.cpp
 --- a/source/encoder/ratecontrol.cppMon Dec 16 13:57:58 2013 +0530
 +++ b/source/encoder/ratecontrol.cppMon Dec 16 17:42:07 2013 +0530
 @@ -116,6 +116,15 @@
  for (int cuxy = 0; cuxy  cuCount; cuxy++ )
  pic-m_lowres.invQscaleFactor[cuxy] = 256;
  }
 +
 + /* Need variance data for weighted prediction */
 +if (cfg-param.bEnableWeightedPred)
 +{
 +for (int cuy = 0; cuy  maxRow; cuy++ )
 +for (int cux = 0; cux  maxCol; cux++ )
 +acEnergyCu(pic, cux, cuy);
 +}
 +
  }
  else
  {
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel




___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] Warnings during build - on Mac OS X 10.6.8

2013-12-17 Thread Deepthi Nandakumar
Thanks, should be fixed now.


On Tue, Dec 17, 2013 at 4:08 AM, Selur hyb...@selur.de wrote:

 -BEGIN PGP SIGNED MESSAGE-
 Hash: SHA256

 Nothing seriously, just wanted to note that building on Mac OS throws
 some warnings during building.

 /Users/selur/x265/source/Lib/TLibCommon/TComSlice.h: In constructor
 ‘x265::TComSlice::TComSlice()’:
 /Users/selur/x265/source/Lib/TLibCommon/TComSlice.h:1359: warning:
 ‘x265::TComSlice::m_avgQpRc’ will be initialized after
 /Users/selur/x265/source/Lib/TLibCommon/TComSlice.h:1326: warning:
 ‘x265::TComSPS* x265::TComSlice::m_sps’
 /Users/selur/x265/source/Lib/TLibCommon/TComSlice.cpp:49: warning:
 when initialized here

 /Users/selur/x265/source/common/vec/vec-primitives.cpp:66: warning:
 unused parameter ‘p’
 /Users/selur/x265/source/common/vec/vec-primitives.cpp:66: warning:
 unused parameter ‘cpuMask’

 Cu Selur
 -BEGIN PGP SIGNATURE-
 Version: GnuPG v2.0.22 (MingW32)

 iQIcBAEBCAAGBQJSr4DhAAoJEJA5OiupSg93vskP/RUJZ22YyAJ3rN2G2cfz5KBc
 YQwo4aQQH10VtNOKriXMaD5U5ty7DBNJCKY5D1OPv7xpGQdTJVW5D6ClxDhYVTM2
 WbvzONjPfLf8Fq+tKfS1pSnOtYUKeanpjl/GZh6I39XMRYC4XwK3c5AYSBB6c1Nx
 oRx+X3tPKXRYB3g5l988Lt8oQjD+fNpcvHhqOWn2GzRAD7n3zwp5ekZTAsz1DjPP
 EF9xMIngLLhyY/hiEALs661FirZSAqRHKtGWlcriovX2lXC1cFdI1r2LS428e2ND
 B9V56w0pFEnWpD1n2N7sM4p6keFfb9isSxZ3hB/DvN1qOM3cMx0UXuzCXa7fNKNN
 1BTyopaZwRpWZDSHouItiPUTpkPLBpHYqifbhRD5XmFuOX+gkO47z5VL+9hKnhWQ
 YMRhuU5zsmb7epTJaVsf+MQWd2R2UWuBKfgx799AVVU94ls3rHd1cPzP5KJZTW1m
 4KkNJ1Pcl+smzUaPL/GtFKyw4uNP8B8MFYSXV5T4E6RqZay4NYeDk312k4xdGg/Q
 zKzUXBeQrYdyogspbWt3x1xlgox9aP6pWOMkIxchJETDzYt2ll5z9q14SMi88tm7
 bafsCOK1moxrJkBRz6zutJ97AJGtDaEt7qu0Pu1bfFsUIMkhf/eq2pOR3zoKzaBU
 oeVWgXy6N0bxfcnzVvGG
 =aX90
 -END PGP SIGNATURE-
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] cutree: bug fixes. correct the timescale used in getQScale()

2013-12-27 Thread Deepthi Nandakumar
On Fri, Dec 27, 2013 at 11:07 PM, Aarthi Thirumalai 
aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1388165826 -19800
 #  Fri Dec 27 23:07:06 2013 +0530
 # Node ID 964e5bc90ad2a0f80980409046a13b4cbaf11a03
 # Parent  8b5c5fe7fbc923684af45e5ff7a0ed5ed6e83db9
 cutree: bug fixes. correct the timescale used in getQScale()

 diff -r 8b5c5fe7fbc9 -r 964e5bc90ad2 source/encoder/ratecontrol.cpp
 --- a/source/encoder/ratecontrol.cppFri Dec 27 15:30:41 2013 +0530
 +++ b/source/encoder/ratecontrol.cppFri Dec 27 23:07:06 2013 +0530
 @@ -633,10 +633,11 @@

  if (cfg-param.rc.cuTree)
  {
 -double scale =
 curSlice-getSPS()-getVuiParameters()-getTimingInfo()-getTimeScale();
 -double units =
 curSlice-getSPS()-getVuiParameters()-getTimingInfo()-getNumUnitsInTick();
 -double timescale = units / scale;
 -q = pow(BASE_FRAME_DURATION / CLIP_DURATION(frameDuration *
 timescale), 1 - cfg-param.rc.qCompress);
 +// Scale and units are obtained from rateNum and rateDenom for
 videos with fixed frame rates.
 +double scale = cfg-param.frameRate * 2;
 +double numTicks = 1;
 +double timescale = numTicks / scale;
 +q = pow(BASE_FRAME_DURATION / CLIP_DURATION(2 * timescale), 1 -
 cfg-param.rc.qCompress);


Good catch. Currently, these SPS/VUI parameters arent even set.

 }
  else
  q = pow(rce-blurredComplexity, 1 - cfg-param.rc.qCompress);
 diff -r 8b5c5fe7fbc9 -r 964e5bc90ad2 source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cpp  Fri Dec 27 15:30:41 2013 +0530
 +++ b/source/encoder/slicetype.cpp  Fri Dec 27 23:07:06 2013 +0530
 @@ -1394,7 +1394,7 @@
  memset(Frames[b]-propagateCost, 0, widthInCU * sizeof(uint16_t));

  uint16_t StrideInCU = (uint16_t)widthInCU;
 -for (uint16_t block_y = 0; block_y  heightInCU; block_y += 16)
 +for (uint16_t block_y = 0; block_y  heightInCU; block_y++)
  {
  int cuIndex = block_y * StrideInCU;
  /* TODO This function go into ASM */
 @@ -1404,7 +1404,7 @@

  if (referenced)
  propagate_cost += widthInCU;
 -for (uint16_t block_x = 0; block_x  widthInCU; block_x += 16,
 cuIndex++)
 +for (uint16_t block_x = 0; block_x  widthInCU; block_x++,
 cuIndex++)
  {
  int propagate_amount = scratch[block_x];
  /* Don't propagate for an intra block. */
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Modifications to address review comments to support multiple color space format

2014-01-11 Thread Deepthi Nandakumar
This is a CMake + VS configuration issue. A quick Google search - please
read the following:

http://stackoverflow.com/questions/2849517/linking-problem-fatal-error-lnk1112-module-machine-type-x64-conflicts-with-t


On Sat, Jan 11, 2014 at 12:28 AM, Purvin Pandit purv...@hotmail.com wrote:

 I tried to complie x265 under windows for VS. I get the following errors
 any suggestions:

 Error 5 error LNK1112: module machine type 'X86' conflicts with target
 machine type 'x64'
 X:\x265\multicoreware-x265-b970ffbdd696\build\vc10-x86_64\pixel-a.asm.obj
 1 x265-shared

 Error 6 error LNK1112: module machine type 'x64' conflicts with target
 machine type 'X86'
 X:\x265\multicoreware-x265-b970ffbdd696\build\vc10-x86_64\encoder\encoder.dir\Release\NALwrite.obj
 1 x265-static

 Error 7 error LNK1181: cannot open input file 'Release\x265-static.lib'
 X:\x265\multicoreware-x265-b970ffbdd696\build\vc10-x86_64\LINK cli

 Thanks,
 -Purvin

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] rd Level: improve bitrate and psnr in rd level 2

2014-01-14 Thread Deepthi Nandakumar
On Fri, Jan 10, 2014 at 2:10 PM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddy
 # Date 1389343212 -19800
 # Node ID 104fb077a9813f0f3bb94c370e134d0d6d180809
 # Parent  80b63c3ee144e6edbafbbe281ad3d1d8505be1f6
 rd Level: improve bitrate and psnr in rd level 2

 a. Always allow intra mode for mode decision
 b. increase more merge skips at each depth

 Results are below
 CLI: input.y4m -o bitstream.hevc -r recon.y4m --preset veryfast --rd 2

 rd3 / previous rd2 / latest rd2
 BasketballDrive_1920x1080_50
 fps: 4.24/5.615.7
 psnr: 33.67/33.95/33.677
 birate: 2211/2272/2188

 Kimono1_1920x1080_24
 fps: 5.45/6.11/6.1
 psnr: 35.004/35.144/34.835
 birate: 713/740/692

 FourPeople_1280x720_60
 fps: 7.99/10.86/11.46
 psnr: 34.93/35.17/34.931
 birate: 320/341/341

 big_buck_bunny_360p24
 fps: 38.98/49.32/44.46
 psnr: 33.21/33.36/33.167
 birate: 56/60/56

 Johnny_1280x720_60
 fps: 8.21/9.58/11.32
 psnr: 36.74/37.238/37.01
 birate: 203/204/206

 diff -r 80b63c3ee144 -r 104fb077a981 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Thu Jan 09 12:50:16 2014 +0550
 +++ b/source/encoder/compress.cpp   Fri Jan 10 14:10:12 2014 +0530
 @@ -517,12 +517,6 @@
  bdoIntra = (outBestCU-getCbf(0, TEXT_LUMA) ||
  outBestCU-getCbf(0, TEXT_CHROMA_U) ||
  outBestCU-getCbf(0, TEXT_CHROMA_V));
  }
 -else
 -{
 -uint32_t threshold[4] = { 2, 6000, 1600, 500
 };
 -int index = 4 -
 g_convertToBit[outBestCU-getWidth(0)];
 -bdoIntra = (outBestCU-m_totalDistortion 
 threshold[index]);
 -}


This change is valid.


  if (bdoIntra)
  {
  xComputeCostIntraInInter(m_intraInInterCU[depth],
 SIZE_2Nx2N);
 @@ -587,8 +581,12 @@
  }

  m_search-encodeResAndCalcRdInterCU(m_mergeCU[depth], m_origYuv[depth],
 bestMergePred, m_tmpResiYuv[depth],

  m_bestResiYuv[depth], m_tmpRecoYuv[depth], true);
 -
 -if (m_mergeCU[depth]-m_totalCost 
 outBestCU-m_totalCost)
 +double lambda[4];
 +lambda[0] = 1.06;
 +lambda[1] = 1.5;
 +lambda[2] = 1.1;
 +lambda[3] = 1.0;


This part looks like an artificial way of forcing more skips. I'd like to
understand why merge-skips have reduced so much in the first place in rd 2,
and then fix the root cause.

+if (m_mergeCU[depth]-m_totalCost  lambda[depth]
 * outBestCU-m_totalCost)
  {
  outBestCU = m_mergeCU[depth];
  tempYuv = m_bestRecoYuv[depth];
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] Re:x265-devel Digest, Vol 8, Issue 1

2014-01-15 Thread Deepthi Nandakumar
Hello,

When open-GOP is disabled, ie closed GOP, then IDR frames are used.

Deepthi


On Thu, Jan 16, 2014 at 7:44 AM, i...@sina.com wrote:

 Hi Deepthi, Can openGOP of x265 support random access of video streaming
 like IDR?




 1. Re: [PATCH] slicetype: remove --refresh and use
 --open-gop(default: enable) (Deepthi Nandakumar)
 --
 Message: 1
 Date: Fri, 3 Jan 2014 16:19:29 +0530
 From: Deepthi Nandakumar deep...@multicorewareinc.com
 To: Development for x265 x265-devel@videolan.org, Tom Vaughan
 tom.vaug...@multicorewareinc.com
 Subject: Re: [x265] [PATCH] slicetype: remove --refresh and use
 --open-gop(default: enable)
 Message-ID:
 caaeo3ugez1jaq2ta0hj4ycstrtoamw5zxc3tokv3bb3vtjy...@mail.gmail.com
 Content-Type: text/plain; charset=iso-8859-1
 We'd like to request any relevant feedback on this patch. Essentially,
 we're removing the refresh parameter, since it conflicts with the openGOP
 parameter.
 By default, an openGOP will be enabled, which means we could have both
 RADL/RASL following CRA pictures. The encoder could control the references,
 and thus ensure that all leading pictures are decodable (ie that they are
 always RADL). But IMO, this destroys the purpose of openGOP (?).
 Thoughts/opinions welcome.
 Thanks,
 Deepthi

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] asm : saturation bug fix for luma_vss asm routine

2014-01-30 Thread Deepthi Nandakumar
This patch is pending, right Nabajit? I havent pushed the luma_vss
/chroma_vss assembly patches or the testbench edits to luma_vss.




On Wed, Jan 29, 2014 at 1:56 PM, chen chenm...@163.com wrote:

 @@ -5105,8 +5108,9 @@
  pmaddwdm5, [r6 + 3 * 16]

  paddd  m1, m5  ;m1=[1+2+3+4+5+6+7+8]  Row2 
  end
  psrad  m1, 6
 -
 -packssdw   m0, m1
 +pand   m1, m7
 +
 +packusdw   m0, m1
 
  movlps [r2], m0
  movhps [r2 + r3], m0
 PAND + PACKUSDW may avoid overflow problem, but it is wrong way here
 as you said, you got a result value 0x8D84, it is overflow on 16bits, so
 we need to find really reason,
 I check the HM code, it use Short, so I suggest you catch input data and
 put into HM to check HM's output.


 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] testbench: stress test support for all pixelharness functions

2014-02-04 Thread Deepthi Nandakumar
Does not apply at the tip. Please pull and resend.


On Wed, Feb 5, 2014 at 12:31 PM, muru...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Murugan Vairavel muru...@multicorewareinc.com
 # Date 1391582491 -19800
 #  Wed Feb 05 12:11:31 2014 +0530
 # Node ID b690a5b68676e2b4800d5e0a490d04ee05b9435f
 # Parent  711827aaab063bb0e02aa6ae52cdd9e7b8b9fef4
 testbench: stress test support for all pixelharness functions

 diff -r 711827aaab06 -r b690a5b68676 source/test/pixelharness.cpp
 --- a/source/test/pixelharness.cpp  Tue Feb 04 13:00:44 2014 +0530
 +++ b/source/test/pixelharness.cpp  Wed Feb 05 12:11:31 2014 +0530
 @@ -33,39 +33,84 @@
  #define INCR   32
  #define STRIDE 64
  #define ITERS  100
 +#define MAX_HEIGHT 64
 +#define PAD_ROWS   64
 +#define BUFFSIZE STRIDE * (MAX_HEIGHT + PAD_ROWS) + INCR * ITERS
 +#define TEST_CASES 3
 +#define SMAX (1  12)
 +#define SMIN (-1  12)

  PixelHarness::PixelHarness()
  {
 -int maxheight = 64;
 -int padrows = 64;
 -int bufsize = STRIDE * (maxheight + padrows) + INCR * ITERS;
 +int bufsize = STRIDE * (MAX_HEIGHT + PAD_ROWS) + INCR * ITERS;

  /* 64 pixels wide, 2k deep */
 -pbuf1 = X265_MALLOC(pixel, bufsize);
 -pbuf2 = X265_MALLOC(pixel, bufsize);
 -pbuf3 = X265_MALLOC(pixel, bufsize);
 -pbuf4 = X265_MALLOC(pixel, bufsize);
 +pbuf1 = (pixel*)X265_MALLOC(pixel, bufsize);
 +pbuf2 = (pixel*)X265_MALLOC(pixel, bufsize);
 +pbuf3 = (pixel*)X265_MALLOC(pixel, bufsize);
 +pbuf4 = (pixel*)X265_MALLOC(pixel, bufsize);

 -ibuf1 = X265_MALLOC(int, bufsize);
 +ibuf1 = (int*)X265_MALLOC(int, bufsize);

 -sbuf1 = X265_MALLOC(int16_t, bufsize);
 -sbuf2 = X265_MALLOC(int16_t, bufsize);
 -sbuf3 = X265_MALLOC(int16_t, bufsize);
 +sbuf1 = (int16_t*)X265_MALLOC(int16_t, bufsize);
 +sbuf2 = (int16_t*)X265_MALLOC(int16_t, bufsize);
 +sbuf3 = (int16_t*)X265_MALLOC(int16_t, bufsize);

 -if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 ||
 !sbuf3 || !ibuf1)
 +/*Test Case buffer array */
 +pixel_test_buff  = (pixel**)X265_MALLOC(pixel*, TEST_CASES);
 +short_test_buff  = (int16_t**)X265_MALLOC(int16_t*, TEST_CASES);
 +short_test_buff1 = (int16_t**)X265_MALLOC(int16_t*, TEST_CASES);
 +int_test_buff= (int**)X265_MALLOC(int*, TEST_CASES);
 +
 +if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 ||
 !sbuf3 || !ibuf1 ||
 +!pixel_test_buff || !short_test_buff || !int_test_buff ||
 !short_test_buff1)
  {
  fprintf(stderr, malloc failed, unable to initiate tests!\n);
  exit(1);
  }

 +for (int i = 0; i  TEST_CASES; i++)
 +{
 +pixel_test_buff[i]  = (pixel*)X265_MALLOC(pixel, BUFFSIZE);
 +short_test_buff[i]  = (int16_t*)X265_MALLOC(int16_t, BUFFSIZE);
 +short_test_buff1[i] = (int16_t*)X265_MALLOC(int16_t, BUFFSIZE);
 +int_test_buff[i]= (int*)X265_MALLOC(int, BUFFSIZE);
 +if (!pixel_test_buff[i] || !short_test_buff[i] ||
 !int_test_buff[i] || !short_test_buff1[i])
 +{
 +fprintf(stderr, Init_Test_Case_buffers: malloc failed,
 unable to initiate tests!\n);
 +exit(-1);
 +}
 +}
 +
 +/*[0] --- Random values  */
 +/*[1] --- Minimum*/
 +/*[2] --- Maximum*/
 +
 +for (int i = 0; i  BUFFSIZE; i++)
 +{
 +pixel_test_buff[0][i]   = rand() % PIXEL_MAX;
 +short_test_buff[0][i]   = (rand() % (2 * SMAX + 1)) - SMAX - 1;
 //max(SHORT_MIN, min(rand(), SMAX));
 +short_test_buff1[0][i]  = rand()  PIXEL_MAX;
  //For block copy only
 +int_test_buff[0][i] = rand() % INT32_MAX;
 +
 +pixel_test_buff[1][i]   = PIXEL_MIN;
 +short_test_buff[1][i]   = SMIN;
 +short_test_buff1[1][i]  = PIXEL_MIN;
 +int_test_buff[1][i] = SHORT_MIN;
 +
 +pixel_test_buff[2][i]   = PIXEL_MAX;
 +short_test_buff[2][i]   = SMAX;
 +short_test_buff1[2][i]  = PIXEL_MAX;
 +int_test_buff[2][i] = SHORT_MAX;
 +}
 +
 +
  for (int i = 0; i  bufsize; i++)
  {
  pbuf1[i] = rand()  PIXEL_MAX;
  pbuf2[i] = rand()  PIXEL_MAX;
  pbuf3[i] = rand()  PIXEL_MAX;
  pbuf4[i] = rand()  PIXEL_MAX;
 -
 -#define SMAX (1  12)
  sbuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN,
 min(rand(), SMAX));
  sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN,
 min(rand(), SMAX));
  ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1;
 @@ -83,6 +128,17 @@
  X265_FREE(sbuf1);
  X265_FREE(sbuf2);
  X265_FREE(sbuf3);
 +for (int i = 0; i  TEST_CASES; i++)
 +{
 +X265_FREE(pixel_test_buff[i]);
 +X265_FREE(short_test_buff[i]);
 +X265_FREE(short_test_buff1[i]);
 +X265_FREE(int_test_buff[i]);
 +}
 +X265_FREE(pixel_test_buff);
 +X265_FREE(short_test_buff);
 +X265_FREE(short_test_buff1);
 +X265_FREE(int_test_buff);
  }


Re: [x265] [PATCH] slicetype: bug fix for cuTree, use type int32_t for listamount and propagate_amount to calculate valid propagate_cost

2014-02-06 Thread Deepthi Nandakumar
On Fri, Feb 7, 2014 at 5:48 AM, g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy
 # Date 1391732264 28800
 #  Thu Feb 06 16:17:44 2014 -0800
 # Node ID 0198815523c1e653fee59f8b6ee58bffbfb12131
 # Parent  634bc0b1c24653dd254df77cd80f96f81e71e888
 slicetype: bug fix for cuTree, use type int32_t for listamount and
 propagate_amount to calculate valid propagate_cost

 diff -r 634bc0b1c246 -r 0198815523c1 source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cpp  Wed Feb 05 23:10:22 2014 -0600
 +++ b/source/encoder/slicetype.cpp  Thu Feb 06 16:17:44 2014 -0800
 @@ -824,10 +824,10 @@
  void Lookahead::estimateCUPropagate(Lowres **frames, double
 averageDuration, int p0, int p1, int b, int referenced)
  {
  uint16_t *refCosts[2] = { frames[p0]-propagateCost,
 frames[p1]-propagateCost };
 -int distScaleFactor = (((b - p0)  8) + ((p1 - p0)  1)) / (p1 -
 p0);
 -int bipredWeight = cfg-param.bEnableWeightedBiPred ? 64 -
 (distScaleFactor  2) : 32;
 +int32_t distScaleFactor = (((b - p0)  8) + ((p1 - p0)  1)) / (p1
 - p0);
 +int32_t bipredWeight = cfg-param.bEnableWeightedBiPred ? 64 -
 (distScaleFactor  2) : 32;
  MV *mvs[2] = { frames[b]-lowresMvs[0][b - p0 - 1],
 frames[b]-lowresMvs[1][p1 - b - 1] };
 -int bipredWeights[2] = { bipredWeight, 64 - bipredWeight };
 +int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight };

  memset(scratch, 0, widthInCU * sizeof(int));

 @@ -840,8 +840,8 @@
  if (!referenced)
  memset(frames[b]-propagateCost, 0, widthInCU * sizeof(uint16_t));

 -uint16_t StrideInCU = (uint16_t)widthInCU;
 -for (uint16_t blocky = 0; blocky  heightInCU; blocky++)
 +int32_t StrideInCU = widthInCU;
 +for (int32_t blocky = 0; blocky  heightInCU; blocky++)


Why have these unsigned loop indices been changed to signed? rest looks
valid.

  {
  int cuIndex = blocky * StrideInCU;
  /* TODO This function go into ASM */
 @@ -851,24 +851,24 @@

  if (referenced)
  propagateCost += widthInCU;
 -for (uint16_t blockx = 0; blockx  widthInCU; blockx++, cuIndex++)
 +for (int32_t blockx = 0; blockx  widthInCU; blockx++, cuIndex++)
  {
 -int propagate_amount = scratch[blockx];
 +int32_t propagate_amount = scratch[blockx];
  /* Don't propagate for an intra block. */
  if (propagate_amount  0)
  {
  /* Access width-2 bitfield. */
 -int lists_used = frames[b]-lowresCosts[b - p0][p1 -
 b][cuIndex]  LOWRES_COST_SHIFT;
 +int32_t lists_used = frames[b]-lowresCosts[b - p0][p1 -
 b][cuIndex]  LOWRES_COST_SHIFT;
  /* Follow the MVs to the previous frame(s). */
 -for (uint16_t list = 0; list  2; list++)
 +for (int32_t list = 0; list  2; list++)
  {
  if ((lists_used  list)  1)
  {
 -#define CLIP_ADD(s, x) (s) = X265_MIN((s) + (x), (1  16) - 1)
 -uint16_t listamount = (uint16_t)propagate_amount;
 +#define CLIP_ADD(s, x) (s) = (uint16_t) X265_MIN((s) + (x), (1  16) - 1)
 +int32_t listamount = propagate_amount;

 /* Apply bipred weighting. */
  if (lists_used == 3)
 -listamount = (uint16_t)(listamount *
 bipredWeights[list] + 32)  6;
 +listamount = (listamount *
 bipredWeights[list] + 32)  6;

  /* Early termination for simple case of mv0. */
  if (!mvs[list][cuIndex].word)
 @@ -877,20 +877,20 @@
  continue;
  }

 -uint16_t x = mvs[list][cuIndex].x;
 -uint16_t y = mvs[list][cuIndex].y;

-int cux = (x  5) + blockx;
 -int cuy = (y  5) + blocky;
 -int idx0 = cux + cuy * StrideInCU;
 -int idx1 = idx0 + 1;
 -int idx2 = idx0 + StrideInCU;
 -int idx3 = idx0 + StrideInCU + 1;
 +int32_t x = mvs[list][cuIndex].x;
 +int32_t y = mvs[list][cuIndex].y;
 +int32_t cux = (x  5) + blockx;
 +int32_t cuy = (y  5) + blocky;
 +int32_t idx0 = cux + cuy * StrideInCU;
 +int32_t idx1 = idx0 + 1;
 +int32_t idx2 = idx0 + StrideInCU;
 +int32_t idx3 = idx0 + StrideInCU + 1;
  x = 31;
  y = 31;
 -uint16_t idx0weight = (uint16_t)(32 - y) * (32 -
 x);
 -uint16_t idx1weight = (uint16_t)(32 - y) * x;
 -uint16_t idx2weight = (uint16_t)y * 

Re: [x265] I would like to contribute to x265

2014-02-10 Thread Deepthi Nandakumar
Hi,

Can you take a look at what the following does? Does the decoder actually
detect HRD parameters?

# HG changeset patch
# User Deepthi Nandakumar deep...@multicorewareinc.com
# Date 1387524067 -19800
# Node ID 3e794e059f7ffe0edaaf5432df5297631a0f44f6
# Parent  8133378e225020dbdd747d42a021588bef679ec3
Enable VUI

diff -r 8133378e2250 -r 3e794e059f7f source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppThu Dec 19 17:47:16 2013 +0530
+++ b/source/encoder/encoder.cppFri Dec 20 12:51:07 2013 +0530
@@ -1367,13 +1367,13 @@
 m_bUseASR = false; // adapt search range based on temporal distances
 m_recoveryPointSEIEnabled = 0;
 m_bufferingPeriodSEIEnabled = 0;
-m_pictureTimingSEIEnabled = 0;
+m_pictureTimingSEIEnabled = 1;
 m_displayOrientationSEIAngle = 0;
 m_gradualDecodingRefreshInfoEnabled = 0;
 m_decodingUnitInfoSEIEnabled = 0;
 m_useScalingListId = 0;
 m_activeParameterSetsSEIEnabled = 0;
-m_vuiParametersPresentFlag = false;
+m_vuiParametersPresentFlag = true;
 m_minSpatialSegmentationIdc = 0;
 m_aspectRatioIdc = 0;
 m_sarWidth = 0;
diff -r 8133378e2250 -r 3e794e059f7f source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cppThu Dec 19 17:47:16 2013 +0530
+++ b/source/encoder/frameencoder.cppFri Dec 20 12:51:07 2013 +0530
@@ -136,7 +136,7 @@
 m_sps.setNumLongTermRefPicSPS(0);
 if (m_cfg-getPictureTimingSEIEnabled() ||
m_cfg-getDecodingUnitInfoSEIEnabled())
 {
-m_sps.setHrdParameters(m_cfg-param.frameRate, 0,
m_cfg-param.rc.bitrate, m_cfg-param.bframes  0);
+m_sps.setHrdParameters(m_cfg-param.frameRate, 1,
m_cfg-param.rc.bitrate, m_cfg-param.bframes  0);
 }
 if (m_cfg-getBufferingPeriodSEIEnabled() ||
m_cfg-getPictureTimingSEIEnabled() ||
m_cfg-getDecodingUnitInfoSEIEnabled())
 {



Thanks,
Deepthi


On Tue, Feb 11, 2014 at 7:14 AM, dave dtyx...@gmail.com wrote:

  On 02/10/2014 01:41 PM, Steve Borho wrote:




 On Mon, Feb 10, 2014 at 1:46 PM, dave dtyx...@gmail.com wrote:

   On 02/10/2014 10:41 AM, Steve Borho wrote:




 On Thu, Jan 30, 2014 at 12:31 PM, Steve Borho st...@borho.org wrote:




  On Wed, Jan 29, 2014 at 5:13 PM, dave dtyx...@gmail.com wrote:

 Hi All,

 I would like to offer my services and contribute to x265 development.
  From the wiki it looks like there are plenty things to do but I don't want
 to duplicate or interfere with the work of anyone else so if someone can
 give me something to do I would appreciate it.  I am open to anything
 needed by x265, both c/c++ and assembly work though I don't mind being
 given something simple just to get started.  You can find me in the x265
 irc channel as dtyx265.


  Hi Dave.

  I've been collecting the more pressing TODO items in the bitbucket
 repository's issue tracker:
 https://bitbucket.org/multicoreware/x265/issues?status=newstatus=open

  #21 (enabling the VUI message) is the most pressing of the simple
 problems.  That would be a great place to start.


  Hi Dave,

  How are things going on this front?

  --
 Steve Borho


   ___
 x265-devel mailing 
 listx265-devel@videolan.orghttps://mailman.videolan.org/listinfo/x265-devel

  I studied the VUI in the h265 spec, appendix E and have been studying
 the x265 code from your suggested starting point,
 setVuiParametersPresentFlag().  It looks like most fields are set to spec
 defaults.  Some look like values that can be options specified by the user,
 others look like values that are calculated from encoding a video.

 Can you tell me more about just what pts and dts are?  I understand
 generally what they are but it seems like there are a few places in the VUI
 where they might play a role in calculating values.  I haven't had a chance
 yet to compare to x264 code yet so if it all becomes obvious there then I
 will get it.


  pts is the presentation time stamp of a frame, the point at which it is
 supposed to be displayed by the decoder.

  dts is the decode time stamp of a frame, the point when the decoder is
 supposed to begin decoding it.

  Both are usually specified in units of the frame rate.  Since the pts 
 dts are frame parameters and the VUI is a stream parameter, I don't they
 are directly related, except that the denominator is likely signaled in
 some way.


  I tried to create a user account on bitbucket so I could have issue 21
 assigned to me but I keep getting


  BB might not allow issues to be assigned to users who don't have push
 access anyway, so don't be too concerned about this.  You can add a comment
 to the issue stating you are working on it.  Patches should go through this
 mailing list anyway.

  --
 Steve


 ___
 x265-devel mailing 
 listx265-devel@videolan.orghttps://mailman.videolan.org/listinfo/x265-devel

  I think the denominator that you are looking for is already set in class
 TimingInfo.  vui_num_units_in_tick(confusingly named, if I

Re: [x265] [Bug]reconstruction yuv picture diff with HM decoder out

2014-02-17 Thread Deepthi Nandakumar
Hi,


On Mon, Feb 17, 2014 at 12:38 PM, z...@rock-chips.com 
z...@rock-chips.comwrote:

  hi, x265 members
 1. [Bug report]
 We found that x265 (version 0.7+2-4b8901ae94ece1ac ) recon yuv data diff
 with HM decode out when config CQP mode with QP=34 and set rd=0 or 1 or 2,I 
 think it's a serious bug!

 with command like this


 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o 
 E:\out1.bin -r E:\rec1.yuv --psnr --rd 0


 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o 
 E:\out1.bin -r E:\rec1.yuv --psnr --rd 1

 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o 
 E:\out1.bin -r E:\rec1.yuv --psnr --rd 2


rd 0 and 1 are not finalised yet. We are investigating the hash mismatch in
rd 2 clip with our clips; would be good if you can share the source.


 2. [Proprose]
  The x265 codec encodes video con-tent  using  a  fixed  quantization
 step,  thus  leading to  a  variable bitrate  stream  which  may  not  be
 suitable  for  the  many  multi-media  applications  where  a  constant
 bandwidth  is  required. Therefore,  maybe adaptive quantization  step may
 be better.


Adaptive Quantization is already implemented. You may use --aq-mode 1
(enabled by default in the medium preset).


 I'm looking forward to you
 thks
  z...@rock-chips.com

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] asm: added 16bpp support for dct[4x4, 8x8], idct4x4, dst4x4 and idst4x4 primitives

2014-02-18 Thread Deepthi Nandakumar
Has this been fixed? Murugan - have you reproduced/fixed this issue?


On Sat, Feb 15, 2014 at 12:13 AM, Steve Borho st...@borho.org wrote:




 On Fri, Feb 14, 2014 at 12:39 PM, Steve Borho st...@borho.org wrote:




 On Fri, Feb 14, 2014 at 4:41 AM, dnyanesh...@multicorewareinc.comwrote:

 # HG changeset patch
 # User Dnyaneshwar G dnyanesh...@multicorewareinc.com
 # Date 1392374441 -19800
 #  Fri Feb 14 16:10:41 2014 +0530
 # Node ID 831536babdc08f1553a10754bf2a4f4af6aa1695
 # Parent  ed310b17ff6681f191c85341cf6efe7a50770143
 asm: added 16bpp support for dct[4x4, 8x8], idct4x4, dst4x4 and idst4x4
 primitives


 with this patch applied, if I fixup the elif problems, I get occasional
 dequant test failures on 8bpp mac.

 steve@zeppelin ./test/TestBench

 Using random seed 52FE6216 8bpp

 Testing primitives: SSE2

 Testing primitives: SSE3

 Testing primitives: SSSE3

 Testing primitives: SSE4

 dequant: Failed!


 Sorry, the dequant test failures appear to be caused by Murugan's
 testbench changes.  I'm dequeuing those as well until we understand why the
 test is failing.

 --
 Steve Borho

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [Bug]reconstruction yuv picture diff with HM decoder out

2014-02-19 Thread Deepthi Nandakumar
Hi,

We've checked out for hash mismatch for all our hash clips with --rd 2. Can
you share the source that caused a mismatch? That will help us identify the
issue.

Thanks,
Deepthi


On Mon, Feb 17, 2014 at 12:38 PM, z...@rock-chips.com 
z...@rock-chips.comwrote:

  hi, x265 members
 1. [Bug report]
 We found that x265 (version 0.7+2-4b8901ae94ece1ac ) recon yuv data diff
 with HM decode out when config CQP mode with QP=34 and set rd=0 or 1 or 2,I 
 think it's a serious bug!

 with command like this


 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o 
 E:\out1.bin -r E:\rec1.yuv --psnr --rd 0


 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o 
 E:\out1.bin -r E:\rec1.yuv --psnr --rd 1

 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o 
 E:\out1.bin -r E:\rec1.yuv --psnr --rd 2


 2. [Proprose]
  The x265 codec encodes video con-tent  using  a  fixed  quantization
 step,  thus  leading to  a  variable bitrate  stream  which  may  not  be
 suitable  for  the  many  multi-media  applications  where  a  constant
 bandwidth  is  required. Therefore,  maybe adaptive quantization  step may
 be better.


 I'm looking forward to you
 thks
  z...@rock-chips.com

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] encoder: enable VUI; set HRD parameters in SPS

2014-02-20 Thread Deepthi Nandakumar
This patch has been superseded by the one that enables it from the CLI. I
removed this one from patch list on patchworks.


On Fri, Feb 21, 2014 at 1:14 AM, Steve Borho st...@borho.org wrote:




 On Thu, Feb 20, 2014 at 12:17 PM, dave dtyx...@gmail.com wrote:

 # HG changeset patch
 # User Deepthi Nandakumar deep...@multicorewareinc.com
 # Date 1392883371 -19800
 # Node ID 3934859d310bcc3f54ad1855dd94bd71eb0e7457
 # Parent  3389061b75a486e004409ab628c46fed39d03b72
 encoder: enable VUI; set HRD parameters in SPS.

 You can now add a VUI on the cli.  Use --vui to get a vui will all
 default values or any vui specific options including --nal-hrd to generate
 a VUI with an HRD though currently the HRD will only have all default
 values.


 agreed, we should try to follow x264's CLI and defaults as much as
 possible for new features that come online.

 --
 Steve Borho

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Add x265 API defination to api.cpp

2014-02-25 Thread Deepthi Nandakumar
Ok, looks good except x265_ssim should not be extern, and should be a part
of common.cpp and not api.cpp.


On Tue, Feb 25, 2014 at 2:33 PM, sa...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sagar Kotecha sa...@multicorewareinc.com
 # Date 1393318766 -19800
 #  Tue Feb 25 14:29:26 2014 +0530
 # Node ID 504c2a959e5815cb3020033289137f64cb458aee
 # Parent  a36a669d09e89332dd91817afdf139853ba3ad03
 Add x265 API defination to api.cpp

 diff -r a36a669d09e8 -r 504c2a959e58 source/common/common.cpp
 --- a/source/common/common.cpp  Tue Feb 25 02:22:06 2014 -0600
 +++ b/source/common/common.cpp  Tue Feb 25 14:29:26 2014 +0530
 @@ -134,23 +134,3 @@
  va_end(arg);
  }

 -extern C
 -x265_picture *x265_picture_alloc()
 -{
 -return (x265_picture*)x265_malloc(sizeof(x265_picture));
 -}
 -
 -extern C
 -void x265_picture_init(x265_param *param, x265_picture *pic)
 -{
 -memset(pic, 0, sizeof(x265_picture));
 -
 -pic-bitDepth = param-internalBitDepth;
 -pic-colorSpace = param-internalCsp;
 -}
 -
 -extern C
 -void x265_picture_free(x265_picture *p)
 -{
 -return x265_free(p);
 -}
 diff -r a36a669d09e8 -r 504c2a959e58 source/encoder/CMakeLists.txt
 --- a/source/encoder/CMakeLists.txt Tue Feb 25 02:22:06 2014 -0600
 +++ b/source/encoder/CMakeLists.txt Tue Feb 25 14:29:26 2014 +0530
 @@ -58,4 +58,5 @@
  compress.cpp
  reference.cpp reference.h
  encoder.cpp encoder.h
 +   api.cpp
  weightPrediction.cpp)
 diff -r a36a669d09e8 -r 504c2a959e58 source/encoder/api.cpp
 --- /dev/null   Thu Jan 01 00:00:00 1970 +
 +++ b/source/encoder/api.cppTue Feb 25 14:29:26 2014 +0530
 @@ -0,0 +1,199 @@

 +/*
 + * Copyright (C) 2013 x265 project
 + *
 + * Authors: Steve Borho st...@borho.org
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
 USA.
 + *
 + * This program is also available under a commercial proprietary license.
 + * For more information, contact us at licens...@multicorewareinc.com.
 +
 */
 +
 +#include TLibCommon/CommonDef.h
 +#include param.h
 +#include encoder.h
 +#include frameencoder.h
 +
 +using namespace x265;
 +
 +extern C
 +x265_encoder *x265_encoder_open(x265_param *param)
 +{
 +x265_setup_primitives(param, -1);  // -1 means auto-detect if
 uninitialized
 +
 +if (x265_check_params(param))
 +return NULL;
 +
 +if (x265_set_globals(param))
 +return NULL;
 +
 +Encoder *encoder = new Encoder;
 +if (encoder)
 +{
 +// these may change params for auto-detect, etc
 +encoder-determineLevelAndProfile(param);
 +encoder-configure(param);
 +
 +// save a copy of final parameters in TEncCfg
 +memcpy(encoder-param, param, sizeof(*param));
 +
 +x265_print_params(param);
 +encoder-create();
 +encoder-init();
 +}
 +
 +return encoder;
 +}
 +
 +extern C
 +int x265_encoder_headers(x265_encoder *enc, x265_nal **pp_nal, uint32_t
 *pi_nal)
 +{
 +if (!pp_nal || !enc)
 +return 0;
 +
 +Encoder *encoder = static_castEncoder*(enc);
 +
 +int ret = 0;
 +NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 };
 +if (!encoder-getStreamHeaders(nalunits))
 +{
 +int nalcount = encoder-extractNalData(nalunits);
 +*pp_nal = encoder-m_nals[0];
 +if (pi_nal) *pi_nal = nalcount;
 +}
 +else if (pi_nal)
 +{
 +*pi_nal = 0;
 +ret = -1;
 +}
 +
 +for (int i = 0; i  MAX_NAL_UNITS; i++)
 +{
 +if (nalunits[i])
 +{
 +free(nalunits[i]-m_nalUnitData);
 +X265_FREE(nalunits[i]);
 +}
 +}
 +
 +return ret;
 +}
 +
 +extern C
 +int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t
 *pi_nal, x265_picture *pic_in, x265_picture *pic_out)
 +{
 +if (!enc)
 +return -1;
 +
 +Encoder *encoder = static_castEncoder*(enc);
 +NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 };
 +int numEncoded = encoder-encode(!pic_in, pic_in, pic_out, nalunits);
 +
 +if (pp_nal  numEncoded  0)
 +{
 +int nalcount = encoder-extractNalData(nalunits);
 +*pp_nal = encoder-m_nals[0];
 

Re: [x265] [PATCH] all_angs_pred_32x32, asm code improvement

2014-02-27 Thread Deepthi Nandakumar
The earlier patch was pushed, Praveen. Can you send a new patch which just
removes the unused statements?
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2/2] Fix x86_64 build

2014-03-05 Thread Deepthi Nandakumar
Can you convert this to an hg patch? Git patches don't apply cleanly


On Wed, Mar 5, 2014 at 4:08 PM, Rafaël Carré fun...@videolan.org wrote:

 ---
  source/CMakeLists.txt | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
 index 2febfaa..d2fea1a 100644
 --- a/source/CMakeLists.txt
 +++ b/source/CMakeLists.txt
 @@ -21,7 +21,7 @@ configure_file(${PROJECT_SOURCE_DIR}/x265_config.h.in

  SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake
 ${CMAKE_MODULE_PATH})

 -if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR}
 STREQUAL x86)
 +if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR}
 STREQUAL x86 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64)
  set(X86 1)
  add_definitions(-DX265_ARCH_X86=1)
  if(${CMAKE_SIZEOF_VOID_P} MATCHES 8)
 --
 1.9.0

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] fix chroma lambda weighting

2014-03-25 Thread Deepthi Nandakumar
Thanks, very interesting fix. Can you give us some background here? Where
did we the divide by 3.0 come from?


On Tue, Mar 25, 2014 at 9:01 AM, Satoshi Nakagawa nakagawa...@oki.comwrote:

 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1395672158 -32400
 #  Mon Mar 24 23:42:38 2014 +0900
 # Node ID 08584b5913bce6a5f9d2f0d408fcdace6aa83a65
 # Parent  fdd7c6168cf42a11240ff1c7fc7b401605524db2
 fix chroma lambda weighting

 diff -r fdd7c6168cf4 -r 08584b5913bc source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp   Fri Mar 21 14:44:35 2014 -0500
 +++ b/source/encoder/frameencoder.cpp   Mon Mar 24 23:42:38 2014 +0900
 @@ -335,11 +335,10 @@
  // instead we weight the distortion of chroma.
  int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() +
 slice-getSliceQpDeltaCb();
  int qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
 -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); //
 takes into account of the chroma qp mapping and chroma qp Offset
 -
 +double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0);
 // takes into account of the chroma qp mapping and chroma qp Offset
  chromaQPOffset = slice-getPPS()-getChromaCrQpOffset() +
 slice-getSliceQpDeltaCr();
  qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
 -double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); //
 takes into account of the chroma qp mapping and chroma qp Offset
 +double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0);
 // takes into account of the chroma qp mapping and chroma qp Offset
  double chromaLambda = lambda / crWeight;

  m_rows[row].m_search.setQPLambda(qp, lambda, chromaLambda);
 @@ -376,10 +375,10 @@
  int qpc;
  int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() +
 slice-getSliceQpDeltaCb();
  qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
 -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); //
 takes into account of the chroma qp mapping and chroma qp Offset
 +double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0);
 // takes into account of the chroma qp mapping and chroma qp Offset
  chromaQPOffset = slice-getPPS()-getChromaCrQpOffset() +
 slice-getSliceQpDeltaCr();
  qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
 -double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); //
 takes into account of the chroma qp mapping and chroma qp Offset
 +double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0);
 // takes into account of the chroma qp mapping and chroma qp Offset
  double chromaLambda = lambda / crWeight;

  // NOTE: set SAO lambda every Frame
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] fix: calcrecon asm code for HIGH_BIT_DEPTH

2014-04-02 Thread Deepthi Nandakumar
Hi,

The dependent patch on calcRecon has been backed out. This does not apply
anymore.

Deepthi


On Wed, Apr 2, 2014 at 12:45 PM, Satoshi Nakagawa nakagawa...@oki.comwrote:

 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1396422746 -32400
 #  Wed Apr 02 16:12:26 2014 +0900
 # Node ID a7e96e9909068758588832e47637c7f8e30e5228
 # Parent  03bad90e94adce6fb89c6d5edb86919a1e783402
 fix: calcrecon asm code for HIGH_BIT_DEPTH

 diff -r 03bad90e94ad -r a7e96e990906 source/common/x86/pixel-util8.asm
 --- a/source/common/x86/pixel-util8.asm Wed Apr 02 06:51:35 2014 +0530
 +++ b/source/common/x86/pixel-util8.asm Wed Apr 02 16:12:26 2014 +0900
 @@ -91,16 +91,16 @@

  ; store recipred[]
  movh[r3], m0
 -movhps  [r3 + r5], m0
 +movhps  [r3 + r6], m0

  ; store recqt[]
  movh[r2], m0
 -movhps  [r2 + r6], m0
 +movhps  [r2 + r5], m0

  lea r0, [r0 + r4 * 2]
  lea r1, [r1 + r4 * 2]
 -lea r2, [r2 + r6 * 2]
 -lea r3, [r3 + r5 * 2]
 +lea r2, [r2 + r5 * 2]
 +lea r3, [r3 + r6 * 2]

  dec t7b
  jnz.loop
 @@ -291,7 +291,7 @@
  ; store recqt[]
  movu[r2], m0
  movu[r2 + 16], m1
 -add r2, r6
 +add r2, r5

  lea r0, [r0 + r4 * 2]
  lea r1, [r1 + r4 * 2]
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] cli: set aq strength to 0 in CQP

2014-04-02 Thread Deepthi Nandakumar
On Wed, Apr 2, 2014 at 5:20 PM, Aarthi Priya Thirumalai 
aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1396439378 -19800
 #  Wed Apr 02 17:19:38 2014 +0530
 # Node ID 32ba982c5279dce5abd718453b3c1a6affb51ce5
 # Parent  03bad90e94adce6fb89c6d5edb86919a1e783402
 param: set aq strength to 0 in CQP

 diff -r 03bad90e94ad -r 32ba982c5279 source/encoder/encoder.cpp
 --- a/source/encoder/encoder.cpp Wed Apr 02 06:51:35 2014 +0530
 +++ b/source/encoder/encoder.cpp Wed Apr 02 17:19:38 2014 +0530
 @@ -1415,6 +1415,7 @@
  p-rc.aqMode = X265_AQ_NONE;
  p-rc.bitrate = 0;
  p-rc.cuTree = 0;
 +p-rc.aqStrength = 0;
  }

  if (p-rc.aqMode == 0  p-rc.cuTree)
 @@ -1434,6 +1435,11 @@
  p-rc.aqMode = X265_AQ_NONE;
  }

 +if (p-rc.aqMode = X265_AQ_NONE  p-rc.cuTree == 0)


typo above (==), please send a new patch.

 +{
 +p-rc.aqStrength = 0;
 +}
 +
  if (p-internalCsp != X265_CSP_I420)
  {
  x265_log(p, X265_LOG_WARNING, !! HEVC Range Extension
 specifications are not finalized !!\n);



 On Wed, Apr 2, 2014 at 4:46 PM, Tim Walker tdskywal...@gmail.com wrote:

 On 02 Apr 2014, at 08:11, aar...@multicorewareinc.com wrote:

  # HG changeset patch
  # User Aarthi Thirumalai
  # Date 1396419089 -19800
  #  Wed Apr 02 11:41:29 2014 +0530
  # Node ID 4d614af01c0c101b7862cd9aa5675457b14af1d4
  # Parent  03bad90e94adce6fb89c6d5edb86919a1e783402
  cli: set aq strength to 0 in CQP

 The commit message is wrong. This seems to affect libx265, not the
 command-line interface.

 Tim
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] cli: set aq strength to 0 in CQP

2014-04-02 Thread Deepthi Nandakumar
Never mind, fixed.


On Wed, Apr 2, 2014 at 5:37 PM, Deepthi Nandakumar 
deep...@multicorewareinc.com wrote:




 On Wed, Apr 2, 2014 at 5:20 PM, Aarthi Priya Thirumalai 
 aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1396439378 -19800
 #  Wed Apr 02 17:19:38 2014 +0530
 # Node ID 32ba982c5279dce5abd718453b3c1a6affb51ce5
 # Parent  03bad90e94adce6fb89c6d5edb86919a1e783402
 param: set aq strength to 0 in CQP

 diff -r 03bad90e94ad -r 32ba982c5279 source/encoder/encoder.cpp
 --- a/source/encoder/encoder.cpp Wed Apr 02 06:51:35 2014 +0530
 +++ b/source/encoder/encoder.cpp Wed Apr 02 17:19:38 2014 +0530
 @@ -1415,6 +1415,7 @@
  p-rc.aqMode = X265_AQ_NONE;
  p-rc.bitrate = 0;
  p-rc.cuTree = 0;
 +p-rc.aqStrength = 0;
  }

  if (p-rc.aqMode == 0  p-rc.cuTree)
 @@ -1434,6 +1435,11 @@
  p-rc.aqMode = X265_AQ_NONE;
  }

 +if (p-rc.aqMode = X265_AQ_NONE  p-rc.cuTree == 0)


 typo above (==), please send a new patch.

 +{
 +p-rc.aqStrength = 0;
 +}
 +
  if (p-internalCsp != X265_CSP_I420)
  {
  x265_log(p, X265_LOG_WARNING, !! HEVC Range Extension
 specifications are not finalized !!\n);



 On Wed, Apr 2, 2014 at 4:46 PM, Tim Walker tdskywal...@gmail.com wrote:

 On 02 Apr 2014, at 08:11, aar...@multicorewareinc.com wrote:

  # HG changeset patch
  # User Aarthi Thirumalai
  # Date 1396419089 -19800
  #  Wed Apr 02 11:41:29 2014 +0530
  # Node ID 4d614af01c0c101b7862cd9aa5675457b14af1d4
  # Parent  03bad90e94adce6fb89c6d5edb86919a1e783402
  cli: set aq strength to 0 in CQP

 The commit message is wrong. This seems to affect libx265, not the
 command-line interface.

 Tim
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] vbv: log frame-average QPs for VBV; even when AQ is disabled

2014-04-11 Thread Deepthi Nandakumar
Sure.


On Fri, Apr 11, 2014 at 11:41 AM, Aarthi Priya Thirumalai 
aar...@multicorewareinc.com wrote:

 hello,
   reminder : this bug-fix is still pending. can we have this patch pushed
 in to fix the bug, if there is no problems with it?


 On Tue, Apr 8, 2014 at 8:33 PM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1396969086 -19800
 #  Tue Apr 08 20:28:06 2014 +0530
 # Node ID cac0dcd5a5c2470194d58057d9decd38da3e4405
 # Parent  b5caca9954f36fc8e1cfb9e25f96288bf3aa18e2
 vbv: log frame-average QPs for VBV; even when AQ is disabled.

 diff -r b5caca9954f3 -r cac0dcd5a5c2 source/encoder/ratecontrol.cpp
 --- a/source/encoder/ratecontrol.cppTue Apr 08 16:13:11 2014 +0530
 +++ b/source/encoder/ratecontrol.cppTue Apr 08 20:28:06 2014 +0530
 @@ -1027,6 +1027,8 @@

  pic-m_avgQpRc /= (pic-getFrameHeightInCU() *
 pic-getFrameWidthInCU());
  rce-qpaRc = pic-m_avgQpRc;
 +// copy avg RC qp to m_avgQpAq. To print out the
 correct qp when aq/cutree is disabled.
 +pic-m_avgQpAq = pic-m_avgQpRc;
  }

  if (pic-m_qpaAq)



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] fix: constrained intra

2014-04-15 Thread Deepthi Nandakumar
Thanks, verified and pushed. Do you mind including some more information on
this option to the docs? This only affects intra decisions in inter-slices,
I think?

Deepthi


On Tue, Apr 15, 2014 at 10:16 AM, Satoshi Nakagawa nakagawa...@oki.comwrote:

  Was it generating hash mistakes without these fixes?

 Yes, when encoding with --constraind-intra option.

  -Original Message-
  From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
  Steve Borho
  Sent: Tuesday, April 15, 2014 1:29 PM
  To: Development for x265
  Subject: Re: [x265] fix: constrained intra
 
  On Mon, Apr 14, 2014 at 10:36 PM, Satoshi Nakagawa nakagawa...@oki.com
  wrote:
   # HG changeset patch
   # User Satoshi Nakagawa nakagawa...@oki.com # Date 1397532876 -32400
   #  Tue Apr 15 12:34:36 2014 +0900
   # Node ID 5dde9f4817813e96116df5b86925fa3fc5eff2a8
   # Parent  08d64a70594ed31cd80046bd4a7e9fa52119be47
   fix: constrained intra
 
  Was it generating hash mistakes without these fixes?
 
  
   diff -r 08d64a70594e -r 5dde9f481781
  source/Lib/TLibCommon/TComPattern.cpp
   --- a/source/Lib/TLibCommon/TComPattern.cpp Mon Apr 14 13:18:18
  2014 -0500
   +++ b/source/Lib/TLibCommon/TComPattern.cpp Tue Apr 15 12:34:36
  2014 +0900
   @@ -79,12 +79,24 @@
int  leftUnits   = cuHeightInUnits  1;
partIdxLB=
  g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) *
  partIdxStride)];
  
   -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
   -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
 partIdxRT,
  (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred())
   +{
   +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu,
  partIdxLT);
   +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT,
  partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +}
   +else
   +{
   +bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu,
  partIdxLT);
   +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   +numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1));
   +numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   +numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT,
  partIdxLB, (bNeighborFlags + leftUnits - 1));
   +numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT,
  partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +}
  
width = cuWidth2 + 1;
height = cuHeight2 + 1;
   @@ -238,12 +250,24 @@
int  leftUnits   = cuHeightInUnits  1;
partIdxLB=
  g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) *
  partIdxStride)];
  
   -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
   -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
 partIdxRT,
  (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred())
   +{
   +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu,
  partIdxLT);
   +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT,
  

Re: [x265] [PATCH] vbv:reinit cu data when row vbv re-rencode is triggered

2014-04-18 Thread Deepthi Nandakumar
I see what you're trying to do here. m_baseQP is always zero in normal vbv
operation, but contains previous QP values during re-encode.

A better solution would be to find out which field in TComDataCU actually
contains uncleared values, and clear them in TComDataCU::initCU.



On Sat, Apr 19, 2014 at 1:07 AM, Steve Borho st...@borho.org wrote:

 On Fri, Apr 18, 2014 at 7:31 AM,  santhosh...@multicorewareinc.com
 wrote:
  # HG changeset patch
  # User Santhoshini Sekar santhosh...@multicorewareinc.com
  # Date 1397824258 -19800
  #  Fri Apr 18 18:00:58 2014 +0530
  # Node ID 3d680a36ea9acc45f3db128a5efe557a79a3026b
  # Parent  03aa222ff8eb5ffbe65bd5bf522cad6561210024
  vbv:reinit cu data when row vbv re-rencode is triggered.

 space after colon
 no need for end punctuation on summary line

 
  diff -r 03aa222ff8eb -r 3d680a36ea9a source/encoder/frameencoder.cpp
  --- a/source/encoder/frameencoder.cpp   Fri Apr 18 17:32:14 2014 +0530
  +++ b/source/encoder/frameencoder.cpp   Fri Apr 18 18:00:58 2014 +0530
  @@ -1100,6 +1100,8 @@
   {
   if (!row)
   m_pic-m_rowDiagQp[row] = m_pic-m_avgQpRc;
  +if (cu-m_baseQp != 0)
  +cu-initEstData(0);

 this doesn't look to be the appropriate place for this sort of reset.
 If it needs to happen after a row restart shouldn't it be done in the
 restart loop below?  Here it looks like it could be calling
 initEstData() many times, redundantly.

 
   if (row = col  row  m_vbvResetTriggerRow != row)
   cu-m_baseQp = m_pic-getCU(cuAddr - numCols +
 1)-m_baseQp;
  ___
  x265-devel mailing list
  x265-devel@videolan.org
  https://mailman.videolan.org/listinfo/x265-devel



 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] cutree: adjust the rowSatd with qpoffset only for reference frames

2014-05-15 Thread Deepthi Nandakumar
Thanks a lot, yes, it is a typo.
On May 16, 2014 6:20 AM, den c dnc...@gmail.com wrote:

 On 5/15/14, g...@multicorewareinc.com g...@multicorewareinc.com wrote:
  # HG changeset patch
  # User Gopu Govindaswamy
  # Date 1400146948 -19800
  #  Thu May 15 15:12:28 2014 +0530
  # Node ID 794b7d744a14c653f76748b4ec3033b040c3e783
  # Parent  6ca880d7e68361e5b9e2353f05369d9e5c40a29f
  cutree: adjust the rowSatd with qpoffset only for reference frames
 
  diff -r 6ca880d7e683 -r 794b7d744a14 source/encoder/slicetype.cpp
  --- a/source/encoder/slicetype.cppThu May 15 12:31:06 2014 +0530
  +++ b/source/encoder/slicetype.cppThu May 15 15:12:28 2014 +0530
  @@ -1148,7 +1148,7 @@
   {
   int64_t score = 0;
   int *rowSatd = frames[b]-rowSatds[b - p0][p1 - b];
  -double *qp_offset = IS_X265_TYPE_B(frames[b]-sliceType) ?
  frames[b]-qpAqOffset : frames[b]-qpOffset;
  +double *qp_offset = (frames[b]-sliceType = X265_TYPE_B) ?

  I think this is typo and needed: (frames[b]-sliceType == X265_TYPE_B)).
 Correct me if i worng.
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] fix : square chroma transform expected error message

2014-05-19 Thread Deepthi Nandakumar
Can we have more detail on this, Ashok? Both trWidthC, trHeightC and widthC
and heightC should have the same values. Why exactly was this assert being
triggered in the regression tests?


On Mon, May 19, 2014 at 7:19 PM, as...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Ashok Kumar Mishraas...@multicorewareinc.com
 # Date 1400507347 -19800
 #  Mon May 19 19:19:07 2014 +0530
 # Node ID 8647c7861144eee4a0f96687794607b3e98d7b9f
 # Parent  ba2a9f61ea06f0ac799d8c0247eec770065465bb
 fix :  square chroma transform expected error message

 diff -r ba2a9f61ea06 -r 8647c7861144 source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri May 16 19:20:46 2014
 +0900
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon May 19 19:19:07 2014
 +0530
 @@ -2975,7 +2975,7 @@
  else
  {
  int16_t *ptr = resiYuv-getCbAddr(absTUPartIdxC);
 -X265_CHECK(trWidthC == trHeightC, square chroma
 transform expected\n);
 +X265_CHECK(widthC == heightC, square chroma
 transform expected\n);

  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr,
 resiYuv-m_cwidth, 0);
  }
  if (absSumV)
 @@ -2991,7 +2991,7 @@
  else
  {
  int16_t *ptr =  resiYuv-getCrAddr(absTUPartIdxC);
 -X265_CHECK(trWidthC == trHeightC, square chroma
 transform expected\n);
 +X265_CHECK(widthC == heightC, square chroma
 transform expected\n);

  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr,
 resiYuv-m_cwidth, 0);
  }
  cu-setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U,
 absTUPartIdxC, tuIterator.m_absPartIdxStep);
 @@ -3348,7 +3348,7 @@
  {
  int16_t *ptr =
 m_qtTempShortYuv[qtlayer].getCbAddr(tuIterator.m_absPartIdxTURelCU);
  const uint32_t stride =
 m_qtTempShortYuv[qtlayer].m_cwidth;
 -X265_CHECK(trWidthC == trHeightC, square chroma
 transform expected\n);
 +X265_CHECK(widthC == heightC, square chroma
 transform expected\n);

  primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0);
  }

 @@ -3416,7 +3416,7 @@
  {
  int16_t *ptr =
  m_qtTempShortYuv[qtlayer].getCrAddr(tuIterator.m_absPartIdxTURelCU);
  const uint32_t stride =
 m_qtTempShortYuv[qtlayer].m_cwidth;
 -X265_CHECK(trWidthC == trHeightC, square chroma
 transform expected\n);
 +X265_CHECK(widthC == heightC, square chroma
 transform expected\n);

  primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0);
  }

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] CLI: Default rate control mode in help output - CQ or CRF?

2014-05-20 Thread Deepthi Nandakumar
On Tue, May 20, 2014 at 1:30 PM, Mario *LigH* Rohkrämer cont...@ligh.dewrote:

 In the help output of the x265 CLI, I see both default values for CRF
 (28.0) and CQ (32). Are they valid at the same time? Or is the default QP
 outdated since CRF is the default rate control mode?

 Yes, CRF is the default mode. With nothing specified --crf 32 is the
default for ratecontrol.  I'm removing the default QP value to avoid
confusion.

I believe if I explicitly define either a CRF or CQ value, this will set
 the rate control mode to either CRF or CQ depending which comes last in the
 command line?

 Correct.


 --

 Fun and success!
 Mario *LigH* Rohkrämer
 mailto:cont...@ligh.de

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] CLI: Default rate control mode in help output - CQ or CRF?

2014-05-20 Thread Deepthi Nandakumar
Sorry, yes, typo: --crf 28.0 is the default.


On Tue, May 20, 2014 at 3:14 PM, Mario *LigH* Rohkrämer cont...@ligh.dewrote:

 Am 20.05.2014, 11:30 Uhr, schrieb Deepthi Nandakumar 
 deep...@multicorewareinc.com:


  With nothing specified --crf 32 is the default for ratecontrol.


 No, actually --crf 28.0, I believe?


 --

 Fun and success!
 Mario *LigH* Rohkrämer
 mailto:cont...@ligh.de

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] psyrd: use psyrdcost for intra for rdLevels 5

2014-05-22 Thread Deepthi Nandakumar
psy-rd is enabled only in full-rdo decisions (rdLevel = 5). Your patch
calculates intra-psy rd cost for rd=4 and lower, which is where
xEncodeIntrainInter is called.


On Thu, May 22, 2014 at 3:31 PM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddysumala...@multicorewareinc.com
 # Date 1400752845 -19800
 # Node ID fc400f71de9dcbaeda5c0669edabd27e288cdf2c
 # Parent  f39484bb3eecc8cfca0448c63f16fe8dacc54d7f
 psyrd: use psyrdcost for intra for rdLevels  5

 diff -r f39484bb3eec -r fc400f71de9d source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Tue May 20 22:02:00 2014 -0500
 +++ b/source/encoder/compress.cpp   Thu May 22 15:30:45 2014 +0530
 @@ -70,7 +70,18 @@
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);

  cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
 -cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion,
 cu-m_totalBits);
 +if (m_rdCost-psyRdEnabled())
 +{
 +int part = g_convertToBit[cu-getCUSize(0)];
 +uint32_t psyRdCost = m_rdCost-psyCost(part,
 m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(),
 +m_tmpRecoYuv[depth]-getLumaAddr(),
 m_tmpRecoYuv[depth]-getStride());
 +cu-m_totalCost = m_rdCost-calcPsyRdCost(cu-m_totalDistortion,
 cu-m_totalBits, psyRdCost);
 +
 +}
 +else
 +{
 +cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion,
 cu-m_totalBits);
 +}
  }

  void TEncCu::xComputeCostIntraInInter(TComDataCU* cu, PartSize partSize)
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] psyrd: use psyrdcost for intra for rdLevels 5

2014-05-22 Thread Deepthi Nandakumar
On Thu, May 22, 2014 at 4:23 PM, Sumalatha Polureddy 
sumala...@multicorewareinc.com wrote:

 psy-rd is enabled for full-rdo decision(rd=5) and for rd= 4


This is a bug - psy-rd should be enabled only for rd=5. Fixing now.


 m_rdCost.setPsyRdScale(top-param-rdLevel = 4 ? top-param-psyRd : 0);
 so this patch is required for rd=4 where inter uses psyrdcost and intra
 was using rdcost. For other lower rd levels  4, m_psyRdScale = 0, so
 psycost will not have much effect on RDcost


 On Thu, May 22, 2014 at 3:45 PM, Deepthi Nandakumar 
 deep...@multicorewareinc.com wrote:

 psy-rd is enabled only in full-rdo decisions (rdLevel = 5). Your patch
 calculates intra-psy rd cost for rd=4 and lower, which is where
 xEncodeIntrainInter is called.


 On Thu, May 22, 2014 at 3:31 PM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddysumala...@multicorewareinc.com
 # Date 1400752845 -19800
 # Node ID fc400f71de9dcbaeda5c0669edabd27e288cdf2c
 # Parent  f39484bb3eecc8cfca0448c63f16fe8dacc54d7f
 psyrd: use psyrdcost for intra for rdLevels  5

 diff -r f39484bb3eec -r fc400f71de9d source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp   Tue May 20 22:02:00 2014 -0500
 +++ b/source/encoder/compress.cpp   Thu May 22 15:30:45 2014 +0530
 @@ -70,7 +70,18 @@
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);

  cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
 -cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion,
 cu-m_totalBits);
 +if (m_rdCost-psyRdEnabled())
 +{
 +int part = g_convertToBit[cu-getCUSize(0)];
 +uint32_t psyRdCost = m_rdCost-psyCost(part,
 m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(),
 +m_tmpRecoYuv[depth]-getLumaAddr(),
 m_tmpRecoYuv[depth]-getStride());
 +cu-m_totalCost =
 m_rdCost-calcPsyRdCost(cu-m_totalDistortion, cu-m_totalBits, psyRdCost);
 +
 +}
 +else
 +{
 +cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion,
 cu-m_totalBits);
 +}
  }

  void TEncCu::xComputeCostIntraInInter(TComDataCU* cu, PartSize partSize)
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH RFC] psyrd: use psyrdcost for selecting best mode across depth

2014-05-26 Thread Deepthi Nandakumar
On Mon, May 26, 2014 at 3:02 PM, sumala...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Sumalatha Polureddysumala...@multicorewareinc.com
 # Date 1401087565 -19800
 # Node ID cbe5cc0e48b4122518ca732a1b32de16ba25c467
 # Parent  5134e76aa729b6fece18701fdc00390c2f2ffb32
 psyrd: use psyrdcost for selecting best mode across depth

 diff -r 5134e76aa729 -r cbe5cc0e48b4 source/Lib/TLibCommon/TComDataCU.cpp
 --- a/source/Lib/TLibCommon/TComDataCU.cpp  Thu May 22 21:46:21 2014
 -0500
 +++ b/source/Lib/TLibCommon/TComDataCU.cpp  Mon May 26 12:29:25 2014
 +0530
 @@ -218,6 +218,7 @@
  m_sa8dCost = MAX_INT64;
  m_totalDistortion  = 0;
  m_totalBits= 0;
 +m_psyCost  = 0;
  m_numPartitions= pic-getNumPartInCU();
  char* qp   = pic-getCU(getAddr())-getQP();
  m_baseQp   = pic-getCU(getAddr())-m_baseQp;
 @@ -309,6 +310,7 @@
  m_sa8dCost = MAX_INT64;
  m_totalDistortion  = 0;
  m_totalBits= 0;
 +m_psyCost  = 0;

  uint8_t cuSize = g_maxCUSize  depth;

 @@ -345,6 +347,7 @@
  m_sa8dCost = MAX_INT64;
  m_totalDistortion  = 0;
  m_totalBits= 0;
 +m_psyCost  = 0;

  uint8_t cuSize = g_maxCUSize  depth;

 @@ -393,6 +396,7 @@
  m_sa8dCost = MAX_INT64;
  m_totalDistortion  = 0;
  m_totalBits= 0;
 +m_psyCost  = 0;
  m_numPartitions= cu-getTotalNumPart()  2;

  for (int i = 0; i  4; i++)
 @@ -457,6 +461,7 @@
  m_sa8dCost = MAX_INT64;
  m_totalDistortion  = 0;
  m_totalBits= 0;
 +m_psyCost  = 0;
  m_numPartitions= cu-getTotalNumPart()  2;

  for (int i = 0; i  4; i++)
 @@ -520,6 +525,7 @@
  m_sa8dCost = MAX_INT64;
  m_totalDistortion  = 0;
  m_totalBits= 0;
 +m_psyCost  = 0;
  m_numPartitions= cu-getTotalNumPart()  2;

  TComDataCU* rpcCU = m_pic-getCU(m_cuAddr);
 @@ -550,6 +556,7 @@

  m_totalDistortion  += cu-m_totalDistortion;
  m_totalBits+= cu-m_totalBits;
 +m_psyCost  += cu-m_psyCost;

  uint32_t offset   = cu-getTotalNumPart() * partUnitIdx;
  uint32_t numPartition = cu-getTotalNumPart();
 @@ -610,6 +617,7 @@
  rpcCU-m_totalCost   = m_totalCost;
  rpcCU-m_totalDistortion = m_totalDistortion;
  rpcCU-m_totalBits   = m_totalBits;
 +rpcCU-m_psyCost = m_psyCost;

  int sizeInBool  = sizeof(bool) * m_numPartitions;
  int sizeInChar  = sizeof(char) * m_numPartitions;
 @@ -695,6 +703,7 @@
  cu-m_totalCost   = m_totalCost;
  cu-m_totalDistortion = m_totalDistortion;
  cu-m_totalBits   = m_totalBits;
 +cu-m_psyCost = m_psyCost;

  int sizeInBool  = sizeof(bool) * qNumPart;
  int sizeInChar  = sizeof(char) * qNumPart;
 diff -r 5134e76aa729 -r cbe5cc0e48b4 source/Lib/TLibCommon/TComDataCU.h
 --- a/source/Lib/TLibCommon/TComDataCU.hThu May 22 21:46:21 2014
 -0500
 +++ b/source/Lib/TLibCommon/TComDataCU.hMon May 26 12:29:25 2014
 +0530
 @@ -168,6 +168,7 @@
  uint64_t  m_totalCost;   /// sum of partition RD costs
  uint32_t  m_totalDistortion; /// sum of partition distortion
  uint32_t  m_totalBits;   /// sum of partition signal bits
 +uint32_t  m_psyCost;
  uint64_t  m_avgCost[4];  // stores the avg cost of CU's in
 frame for each depth
  uint32_t  m_count[4];
  uint64_t  m_sa8dCost;
 diff -r 5134e76aa729 -r cbe5cc0e48b4 source/Lib/TLibEncoder/TEncCu.cpp
 --- a/source/Lib/TLibEncoder/TEncCu.cpp Thu May 22 21:46:21 2014 -0500
 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Mon May 26 12:29:25 2014 +0530
 @@ -601,7 +601,14 @@
  m_entropyCoder-resetBits();
  m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth);
  outBestCU-m_totalBits +=
 m_entropyCoder-getNumberOfWrittenBits(); // split bits
 -outBestCU-m_totalCost  =
 m_rdCost-calcRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits);
 +if (m_rdCost-psyRdEnabled())
 +{
 +outBestCU-m_totalCost =
 m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion,
 outBestCU-m_totalBits, outBestCU-m_psyCost);


If our objective is to catch unintended comparisons between psyCost and
regular RD cost, you should save the above cost (with psyRd enabled) into
m_psyCost.

+}
 +else
 +{
 +outBestCU-m_totalCost =
 m_rdCost-calcRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits);
 +}
  }

  outTempCU-initEstData(depth);
 @@ -648,7 +655,14 @@
  m_entropyCoder-encodeSplitFlag(outTempCU, 0, depth);
  outTempCU-m_totalBits +=
 m_entropyCoder-getNumberOfWrittenBits(); // split bits
  }
 -outTempCU-m_totalCost =
 m_rdCost-calcRdCost(outTempCU-m_totalDistortion, outTempCU-m_totalBits);
 +if (m_rdCost-psyRdEnabled())
 +{
 +  

Re: [x265] [PATCH] param: log CBR if vbvMaxrate, abr bitrate and vbv bufsize are equal

2014-06-05 Thread Deepthi Nandakumar
CBR is defined when vbvMaxRate = Bitrate, the buffer size need not
necessarily be 1-sec.


On Thu, Jun 5, 2014 at 12:14 PM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1401950608 -19800
 #  Thu Jun 05 12:13:28 2014 +0530
 # Node ID 21c0d24fe9f6b8c20115b64f36c95e184b7aa78a
 # Parent  817c25f5ace136c60100dabb128dda6056c86bfb
 param: log CBR if vbvMaxrate, abr bitrate and vbv bufsize are equal

 diff -r 817c25f5ace1 -r 21c0d24fe9f6 source/common/param.cpp
 --- a/source/common/param.cpp   Thu Jun 05 11:43:17 2014 +0530
 +++ b/source/common/param.cpp   Thu Jun 05 12:13:28 2014 +0530
 @@ -1130,7 +1130,11 @@
  else switch (param-rc.rateControlMode)
  {
  case X265_RC_ABR:
 -x265_log(param, X265_LOG_INFO, Rate Control / AQ-Strength /
 CUTree : ABR-%d kbps / %0.1f / %d\n, param-rc.bitrate,
 +if (param-rc.vbvMaxBitrate == param-rc.bitrate 
 param-rc.vbvMaxBitrate == param-rc.vbvBufferSize)
 +x265_log(param, X265_LOG_INFO, Rate Control / AQ-Strength /
 CUTree : CBR-%d kbps / %0.1f / %d\n, param-rc.bitrate,
 + param-rc.aqStrength, param-rc.cuTree);
 +else
 +x265_log(param, X265_LOG_INFO, Rate Control / AQ-Strength /
 CUTree : ABR-%d kbps / %0.1f / %d\n, param-rc.bitrate,
   param-rc.aqStrength, param-rc.cuTree);
  break;
  case X265_RC_CQP:
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2 of 6] rc: define default setting and validations for 2 pass states

2014-06-15 Thread Deepthi Nandakumar
On Mon, Jun 16, 2014 at 12:20 AM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalaiaar...@multicorewareinc.com
 # Date 1402857077 -19800
 #  Mon Jun 16 00:01:17 2014 +0530
 # Node ID d1f636a6dc0a11657bb25be650de60345c2952c8
 # Parent  438a03ff94830fbc17457b6f324397e643c17cba
 rc: define default setting and validations for 2 pass states

 diff -r 438a03ff9483 -r d1f636a6dc0a source/common/param.cpp
 --- a/source/common/param.cpp   Sun Jun 15 23:59:14 2014 +0530
 +++ b/source/common/param.cpp   Mon Jun 16 00:01:17 2014 +0530
 @@ -180,7 +180,12 @@
  param-rc.cuTree = 1;
  param-rc.rfConstantMax = 0;
  param-rc.rfConstantMin = 0;
 -
 +param-rc.pszStatIn = x265_2pass.log;
 +param-rc.pszStatOut = x264_2pass.log;
 +param-rc.complexityBlur = 20;
 +param-rc.qblur = 0.5;
 +param-rc.statRead = false;
 +param-rc.statWrite = false;
  /* Quality Measurement Metrics */
  param-bEnablePsnr = 0;
  param-bEnableSsim = 0;
 @@ -414,6 +419,18 @@

  return 0;
  }
 +extern C
 +void x265_param_apply_fastfirstpass(x265_param *param)
 +{
 +/* Set faster options in case of turbo firstpass. */
 +if( param-rc.statWrite  !param-rc.statRead )
 +{
 +param-maxNumReferences = 1;
 +param-searchMethod = X265_DIA_SEARCH;
 +param-bEnableEarlySkip = 1;
 +}

/
Hmm, making 2-pass work correctly is priority. We can focus on these turbo
first-pass enhancements later? This could affect bitrate predictions.


 +}
 +

  static int x265_atobool(const char *str, bool bError)
  {
 @@ -911,6 +928,8 @@
  if (s)
  x265_log(param, X265_LOG_WARNING, --tune %s should be used
 if attempting to benchmark %s!\n, s, s);
  }
 +if (param-bOpenGOP  param-rc.statRead)
 +param-lookaheadDepth = 0;

  CHECK(param-rc.qp  -6 * (param-internalBitDepth - 8) ||
 param-rc.qp  51,
QP exceeds supported range (-QpBDOffsety to 51));
 @@ -966,7 +985,7 @@
Rate control mode is out of range);
  CHECK(param-rdLevel  0 || param-rdLevel  6,
RD Level is out of range);
 -CHECK(param-bframes  param-lookaheadDepth,
 +CHECK(param-bframes  param-lookaheadDepth  !param-rc.statRead,
Lookahead depth must be greater than the max consecutive
 bframe count);
  CHECK(param-bframes  0,
bframe count should be greater than zero);
 @@ -1045,6 +1064,9 @@
Target bitrate can not be less than zero);
  if (param-noiseReduction)
  CHECK(100  param-noiseReduction || param-noiseReduction 
 1000, Valid noise reduction range 100 - 1000);
 +CHECK(param-rc.rateControlMode == X265_RC_CRF  param-rc.statRead,
 +  Constant rate-factor is incompatible with 2pass);
 +


And CQP also?


  return check_failed;
  }

 diff -r 438a03ff9483 -r d1f636a6dc0a source/x265.cpp
 --- a/source/x265.cpp   Sun Jun 15 23:59:14 2014 +0530
 +++ b/source/x265.cpp   Mon Jun 16 00:01:17 2014 +0530
 @@ -591,6 +591,7 @@
  }
  #endif // if HIGH_BIT_DEPTH

 +x265_param_apply_fastfirstpass(param);
  InputFileInfo info;
  info.filename = inputfn;
  info.depth = inputBitDepth;
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2 of 3] lambda: change chroma lambda distortion weighting to resemble x264

2014-06-16 Thread Deepthi Nandakumar
Agreed, thats what I thought too. But the g_chromatable is not in the HEVC
spec at all, it's just carried over from HM.

On Jun 16, 2014 5:38 PM, Aarthi Priya Thirumalai 
aar...@multicorewareinc.com wrote:




 On Mon, Jun 16, 2014 at 4:49 PM, deep...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Deepthi Nandakumar deep...@multicorewareinc.com
 # Date 1402916716 -19800
 #  Mon Jun 16 16:35:16 2014 +0530
 # Node ID 4d76a9c8b5abbf143e5869d55cf80a8816d99a68
 # Parent  ff3a85f715d43e2c21aec295426ae9dbe7c03d75
 lambda: change chroma lambda distortion weighting to resemble x264.

 1. x264 scales the chroma distortion by a factor derived from a lambda
offset table
 when psyRd is enabled.

 2. This patch also removes the separate Cb and Cr distortion weights
that were carried over from HM,
 and replaces it with 256 when psy-rd is disabled, and the
above-mentioned lambda offset when it is enabled.

 diff -r ff3a85f715d4 -r 4d76a9c8b5ab
source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:12:00 2014
+0530
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:35:16 2014
+0530
 @@ -143,16 +143,22 @@
  return false;
  }

 -void TEncSearch::setQP(int qp, double crWeight, double cbWeight)
 +void TEncSearch::setQP(int qp)
  {
 -double lambda2 = x265_lambda2_tab[qp];
 -double chromaLambda = lambda2 / crWeight;
 +double lambda2 = x265_lambda2_tab[qp];
 +
 +#define SPEC_QP(x) X265_MIN(x, QP_MAX_SPEC)


 +int effective_chroma_qp = chroma_qp_table[SPEC_QP(qp)] +
X265_MAX(qp - QP_MAX_SPEC, 0);
 +double chromaLambda = x265_lambda2_tab[effective_chroma_qp];
 +int chroma_offset_idx = X265_MIN (qp - effective_chroma_qp + 12,
MAX_CHROMA_LAMBDA_OFFSET);
 +uint64_t chromaWeight = m_rdCost-psyRdEnabled() ?
x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
 +#undef SPEC_QP


 Luma to chroma qp mapping should be done as mentioned in the HEVC
standards.  the  array chroma_qp_table[]  is written according to H.264
spec. We cant be using that for HEVC.
 g_chromaScale[chFmt][qpc]) - gives the correct mapping of chroma qp as
per HEVC spec. chroma qp needs to be obtained from luma qp (0-69) before it
can be clipped to QP_MAX_SPEC for HEVC.



  m_me.setQP(qp);
  m_trQuant-setLambda(lambda2, chromaLambda);
  m_rdCost-setLambda(lambda2, x265_lambda_tab[qp]);
 -m_rdCost-setCbDistortionWeight(cbWeight);
 -m_rdCost-setCrDistortionWeight(crWeight);
 +m_rdCost-setCbDistortionWeight(chromaWeight);
 +m_rdCost-setCrDistortionWeight(chromaWeight);
  }

  void TEncSearch::xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth,
uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t
height, bool bLuma, bool bChroma)
 diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/Lib/TLibEncoder/TEncSearch.h
 --- a/source/Lib/TLibEncoder/TEncSearch.h   Mon Jun 16 16:12:00 2014
+0530
 +++ b/source/Lib/TLibEncoder/TEncSearch.h   Mon Jun 16 16:35:16 2014
+0530
 @@ -142,7 +142,7 @@

  void setRDGoOnSbacCoder(TEncSbac* rdGoOnSbacCoder) {
m_rdGoOnSbacCoder = rdGoOnSbacCoder; }

 -void setQP(int QP, double crWeight, double cbWeight);
 +void setQP(int QP);

  TEncSearch();
  virtual ~TEncSearch();
 diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp   Mon Jun 16 16:12:00 2014 +0530
 +++ b/source/encoder/frameencoder.cpp   Mon Jun 16 16:35:16 2014 +0530
 @@ -363,22 +363,8 @@
  }

  void FrameEncoder::setLambda(int qp, int row)
 -{
 -TComSlice*  slice = m_pic-getSlice();
 -int chFmt = slice-getSPS()-getChromaFormatIdc();
 -
 -// for RDO
 -// in RdCost there is only one lambda because the luma and chroma
bits are not separated,
 -// instead we weight the distortion of chroma.
 -int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() +
slice-getSliceQpDeltaCb();
 -int qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
 -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0);
// takes into account of the chroma qp mapping and chroma qp Offset
 -
 -chromaQPOffset = slice-getPPS()-getChromaCrQpOffset() +
slice-getSliceQpDeltaCr();
 -qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
 -double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0);
// takes into account of the chroma qp mapping and chroma qp Offset
 -
 -m_rows[row].m_search.setQP(qp, crWeight, cbWeight);
 +{
 +m_rows[row].m_search.setQP(qp);
  }

  void FrameEncoder::compressFrame()
 @@ -387,7 +373,6 @@
  int64_t  startCompressTime = x265_mdate();
  TEncEntropy* entropyCoder  = getEntropyCoder(0);
  TComSlice*   slice = m_pic-getSlice();
 -int  chFmt =
slice-getSPS()-getChromaFormatIdc();
  int  totalCoded= (int)m_top-m_encodedFrameNum - 1;

  m_nalCount = 0;
 @@ -515,21 +500,13 @@
  }

  int qp = slice-getSliceQp

Re: [x265] [PATCH 2 of 3] lambda: change chroma lambda distortion weighting to resemble x264

2014-06-16 Thread Deepthi Nandakumar
Ok, thanks. The g_chromaScale table is likely constructed from the H.265
range extension spec, so I was looking in the wrong place. So, the QPs will
be reverted but the chroma lambda changes will still hold. New patch in the
works.


On Mon, Jun 16, 2014 at 8:10 PM, Aarthi Priya Thirumalai 
aar...@multicorewareinc.com wrote:

 pg 155 of HEVC standard:
 Table 8-9 – Specification of QpC as a function of qPi
 qPi  30 30 31 32 33 34 35 36 37 38 39 40 41 42 43  43
 QpC = qPi 29 30 31 32 33 33 34 34 35 35 36 36 37 37 = qPi − 6

 AFAIK, values in g_chromaScale[CHROMA_420] follows the spec exactly. not
 sure how the values for other chroma formats were derived.


 On Mon, Jun 16, 2014 at 7:42 PM, Deepthi Nandakumar 
 deep...@multicorewareinc.com wrote:

 Agreed, thats what I thought too. But the g_chromatable is not in the
 HEVC spec at all, it's just carried over from HM.

 On Jun 16, 2014 5:38 PM, Aarthi Priya Thirumalai 
 aar...@multicorewareinc.com wrote:
 
 
 
 
  On Mon, Jun 16, 2014 at 4:49 PM, deep...@multicorewareinc.com wrote:
 
  # HG changeset patch
  # User Deepthi Nandakumar deep...@multicorewareinc.com
  # Date 1402916716 -19800
  #  Mon Jun 16 16:35:16 2014 +0530
  # Node ID 4d76a9c8b5abbf143e5869d55cf80a8816d99a68
  # Parent  ff3a85f715d43e2c21aec295426ae9dbe7c03d75
  lambda: change chroma lambda distortion weighting to resemble x264.
 
  1. x264 scales the chroma distortion by a factor derived from a lambda
 offset table
  when psyRd is enabled.
 
  2. This patch also removes the separate Cb and Cr distortion weights
 that were carried over from HM,
  and replaces it with 256 when psy-rd is disabled, and the
 above-mentioned lambda offset when it is enabled.
 
  diff -r ff3a85f715d4 -r 4d76a9c8b5ab
 source/Lib/TLibEncoder/TEncSearch.cpp
  --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:12:00
 2014 +0530
  +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:35:16
 2014 +0530
  @@ -143,16 +143,22 @@
   return false;
   }
 
  -void TEncSearch::setQP(int qp, double crWeight, double cbWeight)
  +void TEncSearch::setQP(int qp)
   {
  -double lambda2 = x265_lambda2_tab[qp];
  -double chromaLambda = lambda2 / crWeight;
  +double lambda2 = x265_lambda2_tab[qp];
  +
  +#define SPEC_QP(x) X265_MIN(x, QP_MAX_SPEC)
 
 
  +int effective_chroma_qp = chroma_qp_table[SPEC_QP(qp)] +
 X265_MAX(qp - QP_MAX_SPEC, 0);
  +double chromaLambda = x265_lambda2_tab[effective_chroma_qp];
  +int chroma_offset_idx = X265_MIN (qp - effective_chroma_qp + 12,
 MAX_CHROMA_LAMBDA_OFFSET);
  +uint64_t chromaWeight = m_rdCost-psyRdEnabled() ?
 x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
  +#undef SPEC_QP
 
 
  Luma to chroma qp mapping should be done as mentioned in the HEVC
 standards.  the  array chroma_qp_table[]  is written according to H.264
 spec. We cant be using that for HEVC.
  g_chromaScale[chFmt][qpc]) - gives the correct mapping of chroma qp as
 per HEVC spec. chroma qp needs to be obtained from luma qp (0-69) before it
 can be clipped to QP_MAX_SPEC for HEVC.
 
 
 
   m_me.setQP(qp);
   m_trQuant-setLambda(lambda2, chromaLambda);
   m_rdCost-setLambda(lambda2, x265_lambda_tab[qp]);
  -m_rdCost-setCbDistortionWeight(cbWeight);
  -m_rdCost-setCrDistortionWeight(crWeight);
  +m_rdCost-setCbDistortionWeight(chromaWeight);
  +m_rdCost-setCrDistortionWeight(chromaWeight);
   }
 
   void TEncSearch::xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t
 height, bool bLuma, bool bChroma)
  diff -r ff3a85f715d4 -r 4d76a9c8b5ab
 source/Lib/TLibEncoder/TEncSearch.h
  --- a/source/Lib/TLibEncoder/TEncSearch.h   Mon Jun 16 16:12:00
 2014 +0530
  +++ b/source/Lib/TLibEncoder/TEncSearch.h   Mon Jun 16 16:35:16
 2014 +0530
  @@ -142,7 +142,7 @@
 
   void setRDGoOnSbacCoder(TEncSbac* rdGoOnSbacCoder) {
 m_rdGoOnSbacCoder = rdGoOnSbacCoder; }
 
  -void setQP(int QP, double crWeight, double cbWeight);
  +void setQP(int QP);
 
   TEncSearch();
   virtual ~TEncSearch();
  diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/encoder/frameencoder.cpp
  --- a/source/encoder/frameencoder.cpp   Mon Jun 16 16:12:00 2014 +0530
  +++ b/source/encoder/frameencoder.cpp   Mon Jun 16 16:35:16 2014 +0530
  @@ -363,22 +363,8 @@
   }
 
   void FrameEncoder::setLambda(int qp, int row)
  -{
  -TComSlice*  slice = m_pic-getSlice();
  -int chFmt = slice-getSPS()-getChromaFormatIdc();
  -
  -// for RDO
  -// in RdCost there is only one lambda because the luma and chroma
 bits are not separated,
  -// instead we weight the distortion of chroma.
  -int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() +
 slice-getSliceQpDeltaCb();
  -int qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
  -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) /
 3.0); // takes into account of the chroma qp mapping and chroma qp Offset

Re: [x265] [PATCH] rc: introduce param variables for 2 pass

2014-06-19 Thread Deepthi Nandakumar
Need to bump up X265_BUILD


On Tue, Jun 17, 2014 at 8:06 PM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalaiaar...@multicorewareinc.com
 # Date 1402997162 -19800
 #  Tue Jun 17 14:56:02 2014 +0530
 # Node ID eb3b6d39b83102dfc7c665e3055c1ffda3322e3e
 # Parent  3a19a9fdb103979e65a9daf15c46c0735e8d743e
 rc: introduce param variables for 2 pass

 diff -r 3a19a9fdb103 -r eb3b6d39b831 source/x265.h
 --- a/source/x265.h Tue Jun 17 14:07:26 2014 +0530
 +++ b/source/x265.h Tue Jun 17 14:56:02 2014 +0530
 @@ -742,11 +742,25 @@

  /* In CRF mode, maximum CRF as caused by VBV. 0 implies no limit
 */
  doublerfConstantMax;
 -
  /* In CRF mode, minimum CRF as caused by VBV */
  doublerfConstantMin;
 +
 +/* 2pass */
 +
 +/* Filename of the 2pass output stats file. It's null during the
 final pass when
 + * stats file isn't written.*/
 +char*statOutFileName;
 +
 +/* Filename of the 2pass input stats file that is used to load
 the data for subsequent passes.
 + * It's null during the first pass of a multipass encode. */
 +char*statInFileName;
 +
 +/* temporally blur quants */
 +double   qblur;
 +
 +/* temporally blur complexity */
 +floatcomplexityBlur;
  } rc;
 -
  /*== Video Usability Information ==*/
  struct
  {
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled

2014-06-25 Thread Deepthi Nandakumar
This is primarily a visual quality improvement/psy-rd hack. In 444, since
chroma resolution is on par with luma, and our eyes arent very sensitive to
chroma, we increase the chroma QP so that those bits can be used up in
luma.


On Wed, Jun 25, 2014 at 4:35 PM, Derek Buitenhuis 
derek.buitenh...@gmail.com wrote:

 On 6/25/2014 1:22 AM, deep...@multicorewareinc.com wrote:
  +/* In 444, chroma gets twice as much resolution, so halve quality
 when psy-rd is enabled */
  +if (p-internalCsp == X265_CSP_I444  p-psyRd)
  +{
  +p-cbQpOffset += 6;
  +p-crQpOffset += 6;
  +}

 I dont really understand what the reasoning is for this? Is it just to
 make it
 fit with the model psy-rd is currently using?

 - Derek
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled

2014-06-25 Thread Deepthi Nandakumar
In a sense, psy-rd encapsulates all those r-d algorithms/tweaks/hacks that
improve visual quality but may hurt objective metrics like psnr/ssim.
In 444, this qp hack is likely to hurt objective metrics, hence it's turned
on only if psychovisual improvement is desired.
On Jun 25, 2014 7:02 PM, Derek Buitenhuis derek.buitenh...@gmail.com
wrote:

 On 6/25/2014 12:10 PM, Deepthi Nandakumar wrote:
  This is primarily a visual quality improvement/psy-rd hack. In 444,
 since chroma resolution is on par with luma, and our eyes arent very
 sensitive to chroma, we increase the chroma QP so that those bits can be
 used up in luma.

 Yah I get the idea of a chroma qp offset, I'm just wondering why it is
 specific to psy-rd?

 Cheers,
 - Derek
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2 of 2] pass TLD into class FrameFilter

2014-06-25 Thread Deepthi Nandakumar
On Tue, Jun 24, 2014 at 5:36 AM, Min Chen chenm...@163.com wrote:

 # HG changeset patch
 # User Min Chen chenm...@163.com
 # Date 1403568362 25200
 # Node ID efa48bc0245bded1418db3c42b042acb9969146c
 # Parent  12c1d8aaa8f56a8f2de74c8ff1451d99d04c817d
 pass TLD into class FrameFilter

 diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/cturow.h
 --- a/source/encoder/cturow.h   Mon Jun 23 17:03:49 2014 -0700
 +++ b/source/encoder/cturow.h   Mon Jun 23 17:06:02 2014 -0700
 @@ -47,6 +47,10 @@
  RDCost  m_rdCost;
  TComTrQuant m_trQuant;

 +// NOTE: the maximum LCU 64x64 have 256 partitions
 +boolm_edgeFilter[256];
 +uint8_t m_blockingStrength[256];
 +
  void init(Encoder);
  ~ThreadLocalData();
  };
 diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/encoder.cpp
 --- a/source/encoder/encoder.cppMon Jun 23 17:03:49 2014 -0700
 +++ b/source/encoder/encoder.cppMon Jun 23 17:06:02 2014 -0700
 @@ -42,6 +42,7 @@
  #include x265.h

  using namespace x265;
 +ThreadLocalData* Encoder::m_threadLocalData;

  Encoder::Encoder()
  {
 @@ -194,9 +195,10 @@
  if (m_frameEncoder)
  {
  int numRows = (m_param-sourceHeight + g_maxCUSize - 1) /
 g_maxCUSize;
 +int numCols = (m_param-sourceWidth  + g_maxCUSize - 1) /
 g_maxCUSize;
  for (int i = 0; i  m_param-frameNumThreads; i++)
  {
 -if (!m_frameEncoder[i].init(this, numRows))
 +if (!m_frameEncoder[i].init(this, numRows, numCols))
  {
  x265_log(m_param, X265_LOG_ERROR, Unable to initialize
 frame encoder, aborting\n);
  m_aborted = true;
 diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/encoder.h
 --- a/source/encoder/encoder.h  Mon Jun 23 17:03:49 2014 -0700
 +++ b/source/encoder/encoder.h  Mon Jun 23 17:06:02 2014 -0700
 @@ -175,7 +175,7 @@

  x265_param*m_param;
  RateControl*   m_rateControl;
 -ThreadLocalData*   m_threadLocalData;
 +static ThreadLocalData*   m_threadLocalData;

  bool   m_bEnableRDOQ;

 diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp   Mon Jun 23 17:03:49 2014 -0700
 +++ b/source/encoder/frameencoder.cpp   Mon Jun 23 17:06:02 2014 -0700
 @@ -80,15 +80,17 @@
  stop();
  }

 -bool FrameEncoder::init(Encoder *top, int numRows)
 +bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
  {
  bool ok = true;

  m_top = top;
  m_param = top-m_param;
  m_numRows = numRows;
 +m_numCols = numCols;
  m_filterRowDelay = (m_param-saoLcuBasedOptimization 
 m_param-saoLcuBoundary) ?
  2 : (m_param-bEnableSAO || m_param-bEnableLoopFilter ? 1 : 0);
 +m_filterRowDelayCus = m_filterRowDelay * numCols;

  m_rows = new CTURow[m_numRows];
  for (int i = 0; i  m_numRows; ++i)
 @@ -505,7 +507,7 @@
  // Extend border after whole-frame SAO is finished
  for (int row = 0; row  m_numRows; row++)
  {
 -m_frameFilter.processRowPost(row);
 +m_frameFilter.processRowPost(row, 0);
  }
  }

 @@ -845,7 +847,7 @@
  }

  // setup thread-local data
 -ThreadLocalData tld = threadId = 0 ?
 m_top-m_threadLocalData[threadId] : m_tld;
 +ThreadLocalData tld = threadId = 0 ?
 Encoder::m_threadLocalData[threadId] : m_tld;
  tld.m_trQuant.m_nr = m_nr;
  tld.m_search.m_mref = m_mref;
  codeRow.setThreadLocalData(tld);
 @@ -856,7 +858,8 @@
  tld.m_cuCoder.m_log =
 tld.m_cuCoder.m_sliceTypeLog[m_frame-getSlice()-getSliceType()];

  int64_t startTime = x265_mdate();
 -const uint32_t numCols = m_frame-getPicSym()-getFrameWidthInCU();
 +assert(m_frame-getPicSym()-getFrameWidthInCU() == m_numCols);
 +const uint32_t numCols = m_numCols;
  const uint32_t lineStartCUAddr = row * numCols;
  bool bIsVbv = m_param-rc.vbvBufferSize  0 
 m_param-rc.vbvMaxBitrate  0;

 diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/frameencoder.h
 --- a/source/encoder/frameencoder.h Mon Jun 23 17:03:49 2014 -0700
 +++ b/source/encoder/frameencoder.h Mon Jun 23 17:06:02 2014 -0700
 @@ -65,15 +65,15 @@

  void setThreadPool(ThreadPool *p);

 -bool init(Encoder *top, int numRows);
 +bool init(Encoder *top, int numRows, int numCols);

  void destroy();

  void processRowEncoder(int row, const int threadId);

 -void processRowFilter(int row)
 +void processRowFilter(int row, const int threadId)
  {
 -m_frameFilter.processRow(row);
 +m_frameFilter.processRow(row, threadId);
  }

  void enqueueRowEncoder(int row)
 @@ -108,7 +108,7 @@
  }
  else
  {
 -processRowFilter(realRow);
 +processRowFilter(realRow, threadId);

  // NOTE: Active next row
  if (realRow != m_numRows - 1)
 @@ -154,6 +154,7 @@
  bool 

Re: [x265] [PATCH 2 of 3] rc: accumulate mv bits, coeff bits per frame

2014-06-26 Thread Deepthi Nandakumar
Pls fix extra newlines and whitespace nits.


On Wed, Jun 25, 2014 at 10:54 PM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalaiaar...@multicorewareinc.com
 # Date 1403716735 -19800
 #  Wed Jun 25 22:48:55 2014 +0530
 # Node ID 0995efabd44470c1192994e1aceeb40ae606467f
 # Parent  e71e34d02de228eab43edf1910a71a44417d
 rc: accumulate mv bits, coeff bits per frame.

 diff -r e71e34d02d77 -r 0995efabd444 source/Lib/TLibEncoder/TEncCu.cpp
 --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:46:45 2014 +0530
 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:48:55 2014 +0530
 @@ -555,6 +555,7 @@
  m_entropyCoder-resetBits();
  m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth);
  outBestCU-m_totalBits +=
 m_entropyCoder-getNumberOfWrittenBits(); // split bits
 +
  if (m_rdCost-psyRdEnabled())
  {
  outBestCU-m_totalPsyCost =
 m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion,
 outBestCU-m_totalBits,
 @@ -616,7 +617,7 @@
  m_entropyCoder-encodeSplitFlag(outTempCU, 0, depth);
  outTempCU-m_totalBits +=
 m_entropyCoder-getNumberOfWrittenBits(); // split bits
  }
 -
 +
  if (m_rdCost-psyRdEnabled())
  {
  outTempCU-m_totalPsyCost =
 m_rdCost-calcPsyRdCost(outTempCU-m_totalDistortion,
 outTempCU-m_totalBits,
 @@ -907,6 +908,7 @@
  m_entropyCoder-resetBits();
  m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth);
  outBestCU-m_totalBits +=
 m_entropyCoder-getNumberOfWrittenBits(); // split bits
 +
  if (m_rdCost-psyRdEnabled())
  {
  outBestCU-m_totalPsyCost =
 m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion,
 outBestCU-m_totalBits,
 @@ -1168,6 +1170,7 @@
  return;
  }

 +
  if (slice-getPPS()-getTransquantBypassEnableFlag())
  {
  m_entropyCoder-encodeCUTransquantBypassFlag(cu, absPartIdx);
 @@ -1390,12 +1393,14 @@
  m_entropyCoder-encodePartSize(outTempCU, 0, depth);
  m_entropyCoder-encodePredInfo(outTempCU, 0);
  m_entropyCoder-encodeIPCMInfo(outTempCU, 0);
 +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();

  // Encode Coefficients
  bool bCodeDQP = getdQPFlag();
  m_entropyCoder-encodeCoeff(outTempCU, 0, depth,
 outTempCU-getCUSize(0), bCodeDQP);
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
  outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
 +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits;

  if (m_rdCost-psyRdEnabled())
  {
 @@ -1437,12 +1442,13 @@
  m_entropyCoder-encodePartSize(outTempCU, 0, depth);
  m_entropyCoder-encodePredInfo(outTempCU, 0);
  m_entropyCoder-encodeIPCMInfo(outTempCU, 0);
 -
 +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();
  // Encode Coefficients
  bool bCodeDQP = getdQPFlag();
  m_entropyCoder-encodeCoeff(outTempCU, 0, depth,
 outTempCU-getCUSize(0), bCodeDQP);
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
  outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
 +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits;

  if (m_rdCost-psyRdEnabled())
  {
 @@ -1492,12 +1498,14 @@
  m_entropyCoder-encodeSkipFlag(outTempCU, 0);
  m_entropyCoder-encodePredMode(outTempCU, 0);
  m_entropyCoder-encodePartSize(outTempCU, 0, depth);
 +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();
  m_entropyCoder-encodeIPCMInfo(outTempCU, 0);

  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);

  outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
  outTempCU-m_totalRDCost =
 m_rdCost-calcRdCost(outTempCU-m_totalDistortion, outTempCU-m_totalBits);
 +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits;

  xCheckDQP(outTempCU);
  xCheckBestMode(outBestCU, outTempCU, depth);
 diff -r e71e34d02d77 -r 0995efabd444 source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:46:45 2014
 +0530
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:48:55 2014
 +0530
 @@ -4211,6 +4211,7 @@
  }
  m_entropyCoder-encodeSkipFlag(cu, 0);
  m_entropyCoder-encodeMergeIndex(cu, 0);
 +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();
  return m_entropyCoder-getNumberOfWrittenBits();
  }
  else
 @@ -4225,8 +4226,11 @@
  m_entropyCoder-encodePartSize(cu, 0, cu-getDepth(0));
  m_entropyCoder-encodePredInfo(cu, 0);
  bool bDummy = false;
 +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();
  m_entropyCoder-encodeCoeff(cu, 0, cu-getDepth(0),
 cu-getCUSize(0), bDummy);
 -return m_entropyCoder-getNumberOfWrittenBits();
 +int totalBits = m_entropyCoder-getNumberOfWrittenBits();
 +cu-m_coeffBits = totalBits - 

Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled

2014-06-26 Thread Deepthi Nandakumar
Ok, we dont have a separate psy option.yet
On Jun 26, 2014 6:00 PM, Derek Buitenhuis derek.buitenh...@gmail.com
wrote:

 On 6/26/2014 6:35 AM, BugMaster wrote:
  That is separate --psy (--no-psy) option in x264 and not --psy-rd

 Yeah, that was my point. :)

 - Derek
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] RDOQ enabled rdLevel changed

2014-06-26 Thread Deepthi Nandakumar
Yes, the rest of the patch added support for lower rd levels.


On Fri, Jun 27, 2014 at 6:17 AM, Satoshi Nakagawa nakagawa...@oki.com
wrote:


 Is this change intended?

  # HG changeset patch
  # User Sumalatha Polureddysumala...@multicorewareinc.com
  # Date 1403689018 -19800
  #  Wed Jun 25 15:06:58 2014 +0530
  # Node ID e2ed009d296af39926d79f1a245974f158d6861a
  # Parent  5797d6a8197c96b68752705167ced6cb63194013
  psy-rd: implement psy-rd in rdlevel=4,3 and 2

  diff -r 5797d6a8197c -r e2ed009d296a source/encoder/encoder.cpp
  --- a/source/encoder/encoder.cpp  Wed Jun 25 18:21:34 2014 +0530
  +++ b/source/encoder/encoder.cpp  Wed Jun 25 15:06:58 2014 +0530
  @@ -1267,7 +1267,7 @@
   }
 
   // disable RDOQ if psy-rd is enabled; until we make it psy-aware
  -m_bEnableRDOQ = p-psyRd == 0.0  p-rdLevel = 4;
  +m_bEnableRDOQ = p-psyRd == 0.0  p-rdLevel = 1;
 
   if (p-bLossless)
   {
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2 of 3] rc: accumulate mv bits, coeff bits per frame

2014-06-27 Thread Deepthi Nandakumar
Thanks, can you merge with the latest tip?


On Fri, Jun 27, 2014 at 12:13 AM, Aarthi Priya Thirumalai 
aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalaiaar...@multicorewareinc.com
 # Date 1403808050 -19800
 #  Fri Jun 27 00:10:50 2014 +0530
 # Node ID 11ddf73017d44933090a8943f4fc5098b231b56d
 # Parent  1b669c33ff3a8d8f6c9bd1e18979c009baed2433
 rc: accumulate mv bits, coeff bits per frame

 diff -r 1b669c33ff3a -r 11ddf73017d4 source/Lib/TLibEncoder/TEncCu.cpp
  --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:46:45 2014 +0530
 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Fri Jun 27 00:10:50 2014 +0530
 @@ -1365,12 +1365,14 @@
  m_entropyCoder-encodePredMode(outTempCU, 0);
  m_entropyCoder-encodePartSize(outTempCU, 0, depth);
  m_entropyCoder-encodePredInfo(outTempCU, 0);
  +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();

  // Encode Coefficients
  bool bCodeDQP = getdQPFlag();
  m_entropyCoder-encodeCoeff(outTempCU, 0, depth,
 outTempCU-getCUSize(0), bCodeDQP);
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
  outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
 +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits;

  if (m_rdCost-psyRdEnabled())
  {
 @@ -1411,12 +1413,14 @@
  m_entropyCoder-encodePredMode(outTempCU, 0);
  m_entropyCoder-encodePartSize(outTempCU, 0, depth);
  m_entropyCoder-encodePredInfo(outTempCU, 0);
 +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();

  // Encode Coefficients
  bool bCodeDQP = getdQPFlag();
  m_entropyCoder-encodeCoeff(outTempCU, 0, depth,
 outTempCU-getCUSize(0), bCodeDQP);
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
  outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
 +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits;

  if (m_rdCost-psyRdEnabled())
  {
 diff -r 1b669c33ff3a -r 11ddf73017d4 source/Lib/TLibEncoder/TEncSearch.cpp
  --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:46:45 2014
 +0530
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Jun 27 00:10:50 2014 +0530
 @@ -4059,6 +4059,7 @@
  }
  m_entropyCoder-encodeSkipFlag(cu, 0);
  m_entropyCoder-encodeMergeIndex(cu, 0);
 +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();
  return m_entropyCoder-getNumberOfWrittenBits();
  }
  else
 @@ -4073,8 +4074,11 @@
  m_entropyCoder-encodePartSize(cu, 0, cu-getDepth(0));
  m_entropyCoder-encodePredInfo(cu, 0);
  bool bDummy = false;
 +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits();
  m_entropyCoder-encodeCoeff(cu, 0, cu-getDepth(0),
 cu-getCUSize(0), bDummy);
 -return m_entropyCoder-getNumberOfWrittenBits();
 +int totalBits = m_entropyCoder-getNumberOfWrittenBits();
 +cu-m_coeffBits = totalBits - cu-m_mvBits;
  +return totalBits;
  }
  }

 diff -r 1b669c33ff3a -r 11ddf73017d4 source/encoder/compress.cpp
 --- a/source/encoder/compress.cpp Wed Jun 25 22:46:45 2014 +0530
 +++ b/source/encoder/compress.cpp Fri Jun 27 00:10:50 2014 +0530
 @@ -63,6 +63,7 @@
  m_entropyCoder-encodePredMode(cu, 0);
  m_entropyCoder-encodePartSize(cu, 0, depth);
  m_entropyCoder-encodePredInfo(cu, 0);
 +cu-m_mvBits += m_entropyCoder-getNumberOfWrittenBits();

  // Encode Coefficients
  bool bCodeDQP = getdQPFlag();
 @@ -71,6 +72,7 @@
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);

  cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits();
 +cu-m_coeffBits = cu-m_totalBits - cu-m_mvBits;
  if (m_rdCost-psyRdEnabled())
  {
  int part = g_convertToBit[cu-getCUSize(0)];
 diff -r 1b669c33ff3a -r 11ddf73017d4 source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp Wed Jun 25 22:46:45 2014 +0530
 +++ b/source/encoder/frameencoder.cpp Fri Jun 27 00:10:50 2014 +0530
 @@ -694,6 +694,11 @@
  // Store probabilities of second LCU in line into buffer
  if (col == 1  m_param-bEnableWavefront)
  getBufferSBac(lin)-loadContexts(getSbacCoder(subStrm));
 +
 +// Collect Frame Stats for 2 pass
 +m_frame-m_stats.mvBits += cu-m_mvBits;
 +m_frame-m_stats.coeffBits += cu-m_coeffBits;
 +m_frame-m_stats.miscBits += cu-m_totalBits - (cu-m_mvBits +
 cu-m_coeffBits);
  }

  if (slice-getPPS()-getCabacInitPresentFlag())


 On Thu, Jun 26, 2014 at 4:25 PM, Deepthi Nandakumar 
 deep...@multicorewareinc.com wrote:

 Pls fix extra newlines and whitespace nits.


 On Wed, Jun 25, 2014 at 10:54 PM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalaiaar...@multicorewareinc.com
 # Date 1403716735 -19800
 #  Wed Jun 25 22:48:55 2014 +0530
 # Node ID 0995efabd44470c1192994e1aceeb40ae606467f
 # Parent  e71e34d02de228eab43edf1910a71a44417d
 rc

Re: [x265] [PATCH] framefilter: remove heap corruption in tld

2014-07-02 Thread Deepthi Nandakumar
The framefilter structure needs ThreadLocalData m_tld, that has to be
initialised, and then used if wpp is not enabled. Not sure what you're
trying to do here?


On Wed, Jul 2, 2014 at 2:20 PM, Aarthi Priya Thirumalai 
aar...@multicorewareinc.com wrote:

 sry, Thg client didnt sync the changes properly for the previous commit.
 here is the right one

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1404290172 -19800
 #  Wed Jul 02 14:06:12 2014 +0530
 # Node ID a5a439242bbf367f5d76356b841cfa1ee9e119e4
 # Parent  a18972fd05b1d6242a881bef979b9e1ff17543d9
 framefilter: remove heap corruption in tld

 diff -r a18972fd05b1 -r a5a439242bbf source/encoder/frameencoder.h
 --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500
 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530
 @@ -171,7 +171,7 @@
  uint32_t m_checksum[3];
  double   m_elapsedCompressTime; // elapsed time spent
 in worker threads
  double   m_frameTime;   // wall time from
 frame start to finish
 -
 +ThreadLocalData  m_tld;
  volatile boolm_bAllRowsStop;
  volatile int m_vbvResetTriggerRow;

 @@ -191,7 +191,6 @@
   Bitstream*   m_outStreams;
  NoiseReduction   m_nr;
  NALList  m_nalList;
 -ThreadLocalData  m_tld;

  Frame*   m_frame;

 diff -r a18972fd05b1 -r a5a439242bbf source/encoder/framefilter.cpp
 --- a/source/encoder/framefilter.cpp Tue Jul 01 14:58:35 2014 -0500
 +++ b/source/encoder/framefilter.cpp Wed Jul 02 14:06:12 2014 +0530
 @@ -124,8 +124,7 @@
  void FrameFilter::processRow(int row, const int threadId)
  {
  PPAScopeEvent(Thread_filterCU);
 -assert(threadId = 0);
 -ThreadLocalData tld = Encoder::m_threadLocalData[threadId];
 +ThreadLocalData tld = threadId = 0 ?
 Encoder::m_threadLocalData[threadId] : m_frame-m_tld;

  if (!m_param-bEnableLoopFilter  !m_param-bEnableSAO)
  {


 On Wed, Jul 2, 2014 at 2:15 PM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1404290172 -19800
 #  Wed Jul 02 14:06:12 2014 +0530
 # Node ID 69d9bd3eb5bd015d2e0c90d51eec0d7f8a4747d0
 # Parent  a18972fd05b1d6242a881bef979b9e1ff17543d9
 framefilter: remove heap corruption in tld

 diff -r a18972fd05b1 -r 69d9bd3eb5bd source/encoder/frameencoder.h
 --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500
 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530
 @@ -191,7 +191,6 @@
  Bitstream*   m_outStreams;
  NoiseReduction   m_nr;
  NALList  m_nalList;
 -ThreadLocalData  m_tld;

  Frame*   m_frame;



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] framefilter: remove heap corruption in tld

2014-07-02 Thread Deepthi Nandakumar
Hmm, I'm wondering doesnt m_tld rightfully belong to Encoder?


On Wed, Jul 2, 2014 at 3:39 PM, Aarthi Priya Thirumalai 
aar...@multicorewareinc.com wrote:


 Exactly that!  make FrameEncoder::m_tld a public member for framefilter to
 access, use it in when Wpp is disabled.


 On Wed, Jul 2, 2014 at 2:25 PM, Deepthi Nandakumar 
 deep...@multicorewareinc.com wrote:

 The framefilter structure needs ThreadLocalData m_tld, that has to be
 initialised, and then used if wpp is not enabled. Not sure what you're
 trying to do here?


 On Wed, Jul 2, 2014 at 2:20 PM, Aarthi Priya Thirumalai 
 aar...@multicorewareinc.com wrote:

  sry, Thg client didnt sync the changes properly for the previous
 commit. here is the right one

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1404290172 -19800
 #  Wed Jul 02 14:06:12 2014 +0530
 # Node ID a5a439242bbf367f5d76356b841cfa1ee9e119e4
 # Parent  a18972fd05b1d6242a881bef979b9e1ff17543d9
 framefilter: remove heap corruption in tld

 diff -r a18972fd05b1 -r a5a439242bbf source/encoder/frameencoder.h
 --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500
 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530
 @@ -171,7 +171,7 @@
  uint32_t m_checksum[3];
  double   m_elapsedCompressTime; // elapsed time
 spent in worker threads
  double   m_frameTime;   // wall time from
 frame start to finish
 -
 +ThreadLocalData  m_tld;
  volatile boolm_bAllRowsStop;
  volatile int m_vbvResetTriggerRow;

 @@ -191,7 +191,6 @@
   Bitstream*   m_outStreams;
  NoiseReduction   m_nr;
  NALList  m_nalList;
 -ThreadLocalData  m_tld;

  Frame*   m_frame;

 diff -r a18972fd05b1 -r a5a439242bbf source/encoder/framefilter.cpp
 --- a/source/encoder/framefilter.cpp Tue Jul 01 14:58:35 2014 -0500
 +++ b/source/encoder/framefilter.cpp Wed Jul 02 14:06:12 2014 +0530
 @@ -124,8 +124,7 @@
  void FrameFilter::processRow(int row, const int threadId)
  {
  PPAScopeEvent(Thread_filterCU);
 -assert(threadId = 0);
 -ThreadLocalData tld = Encoder::m_threadLocalData[threadId];
 +ThreadLocalData tld = threadId = 0 ?
 Encoder::m_threadLocalData[threadId] : m_frame-m_tld;

  if (!m_param-bEnableLoopFilter  !m_param-bEnableSAO)
  {


 On Wed, Jul 2, 2014 at 2:15 PM, aar...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1404290172 -19800
 #  Wed Jul 02 14:06:12 2014 +0530
 # Node ID 69d9bd3eb5bd015d2e0c90d51eec0d7f8a4747d0
 # Parent  a18972fd05b1d6242a881bef979b9e1ff17543d9
 framefilter: remove heap corruption in tld

 diff -r a18972fd05b1 -r 69d9bd3eb5bd source/encoder/frameencoder.h
 --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500
 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530
 @@ -191,7 +191,6 @@
  Bitstream*   m_outStreams;
  NoiseReduction   m_nr;
  NALList  m_nalList;
 -ThreadLocalData  m_tld;

  Frame*   m_frame;



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] A compiler warning and a question

2014-07-09 Thread Deepthi Nandakumar
Thanks, that was a bug, fixed.


On Wed, Jul 9, 2014 at 1:34 PM, Mario *LigH* Rohkrämer cont...@ligh.de
wrote:

 v1.1+260-a1e46d813642

 +
 h:/MSYS/home/Entwicklung/x265/source/Lib/TLibEncoder/TEncCu.cpp:1192:6:
 warning: unused parameter 'outBestPredYuv' [-Wunused-parameter]
  void TEncCu::xCheckRDCostMerge2Nx2N(TComDataCU* outBestCU, TComDataCU*
 outTempCU, bool *earlyDetectionSkipMode, TComYuv* outBestPredYuv,
 TComYuv* rpcYuvReconBest)
   ^
 h:/MSYS/home/Entwicklung/x265/source/Lib/TLibEncoder/TEncCu.cpp:1192:6:
 warning: unused parameter 'rpcYuvReconBest' [-Wunused-parameter]
 +



 I made a trivial shell script to compile each both architectures (32 + 64
 bit) and precisions (8 + 16 bpc) in a sequence. I noticed that the
 compiling order can be different for different runs. Sometimes target
 common is handled first, sometimes encoder. Probably nothing to worry
 about, just curious why it can be random.

 --

 Fun and success!
 Mario *LigH* Rohkrämer
 mailto:cont...@ligh.de

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] use std::swap() for readability

2014-07-09 Thread Deepthi Nandakumar
Hello,

We spent a bunch of effort last year to remove STL dependencies, since they
cause serious trouble between different compilers (even between different
compiler versions).  This is especially since a lot of users will use x265
as a static library.

Thanks,
Deepthi


On Wed, Jul 9, 2014 at 3:00 PM, Satoshi Nakagawa nakagawa...@oki.com
wrote:

 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1404898046 -32400
 #  Wed Jul 09 18:27:26 2014 +0900
 # Node ID a3f4317f4acd89b7ef9bb8616068f9e4ff24328c
 # Parent  644773b8532929a30f910fd269f521e44621f2f7
 use std::swap() for readability

 diff -r 644773b85329 -r a3f4317f4acd
 source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
 --- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09
 13:55:42 2014 +0530
 +++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09
 18:27:26 2014 +0900
 @@ -535,12 +535,10 @@
  int isChroma = (yCbCr != 0) ? 1 : 0;
  int shift;
  int cuHeightTmp;
 -pixel* tmpLSwap;
  pixel* tmpL;
  pixel* tmpU;
  pixel* clipTbl = NULL;
  int32_t *offsetBo = NULL;
 -int32_t *tmp_swap;

  picWidthTmp  = (isChroma == 0) ? m_picWidth  : m_picWidth  
 m_hChromaShift;
  picHeightTmp = (isChroma == 0) ? m_picHeight : m_picHeight 
 m_vChromaShift;
 @@ -707,9 +705,7 @@

  m_upBufft[startX] = signDown2;

 -tmp_swap  = m_upBuff1;
 -m_upBuff1 = m_upBufft;
 -m_upBufft = tmp_swap;
 +std::swap(m_upBuff1, m_upBufft);

  rec += stride;
  }
 @@ -775,9 +771,7 @@

  //   if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
  {
 -tmpLSwap = m_tmpL1;
 -m_tmpL1  = m_tmpL2;
 -m_tmpL2  = tmpLSwap;
 +std::swap(m_tmpL1, m_tmpL2);
  }
  }

 @@ -864,7 +858,6 @@
  int frameWidthInCU = m_pic-getFrameWidthInCU();
  int frameHeightInCU = m_pic-getFrameHeightInCU();
  int stride;
 -pixel *tmpUSwap;
  int sChroma = (yCbCr == 0) ? 0 : 1;
  bool mergeLeftFlag;
  int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY :
 m_saoBitIncreaseC;
 @@ -976,9 +969,7 @@
  }
  }

 -tmpUSwap   = m_tmpU1[yCbCr];
 -m_tmpU1[yCbCr] = m_tmpU2[yCbCr];
 -m_tmpU2[yCbCr] = tmpUSwap;
 +std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]);
  }
  }

 @@ -1018,7 +1009,6 @@
  int addr;
  int frameWidthInCU = m_pic-getFrameWidthInCU();
  int stride;
 -pixel *tmpUSwap;
  int sChroma = (yCbCr == 0) ? 0 : 1;
  bool mergeLeftFlag;
  int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY :
 m_saoBitIncreaseC;
 @@ -1122,9 +1112,7 @@
  }
  }

 -tmpUSwap   = m_tmpU1[yCbCr];
 -m_tmpU1[yCbCr] = m_tmpU2[yCbCr];
 -m_tmpU2[yCbCr] = tmpUSwap;
 +std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]);
  }
  }

 diff -r 644773b85329 -r a3f4317f4acd source/Lib/TLibEncoder/TEncCu.cpp
 --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 13:55:42 2014 +0530
 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 18:27:26 2014 +0900
 @@ -1258,22 +1258,11 @@
  uint64_t bestCost = m_rdCost-psyRdEnabled() ?
 outBestCU-m_totalPsyCost : outBestCU-m_totalRDCost;
  if (tempCost  bestCost)
  {
 -TComDataCU* tmp = outTempCU;
 -outTempCU = outBestCU;
 -outBestCU = tmp;
 -
 -// Change Prediction data
 -TComYuv* yuv = NULL;
 -yuv = outBestPredYuv;
 -outBestPredYuv = m_tmpPredYuv[depth];
 -m_tmpPredYuv[depth] = yuv;
 -
 -yuv = rpcYuvReconBest;
 -rpcYuvReconBest = m_tmpRecoYuv[depth];
 -m_tmpRecoYuv[depth] = yuv;
 -
 +std::swap(outBestCU, outTempCU);
 +std::swap(outBestPredYuv, m_tmpPredYuv[depth]);
 +std::swap(rpcYuvReconBest, m_tmpRecoYuv[depth]);

  
 m_rdSbacCoders[depth][CI_TEMP_BEST]-store(m_rdSbacCoders[depth][CI_NEXT_BEST]);
 -}
 +}
  outTempCU-setQPSubParts(origQP, 0, depth);
  outTempCU-setSkipFlagSubParts(false, 0, depth);
  if (!bestIsSkip)
 @@ -1446,21 +1435,14 @@

  if (tempCost  bestCost)
  {
 -TComYuv* yuv;
  // Change Information data
 -TComDataCU* cu = outBestCU;
 -outBestCU = outTempCU;
 -outTempCU = cu;
 +std::swap(outBestCU, outTempCU);

  // Change Prediction data
 -yuv = m_bestPredYuv[depth];
 -m_bestPredYuv[depth] = m_tmpPredYuv[depth];
 -m_tmpPredYuv[depth] = yuv;
 +std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]);

  // Change Reconstruction data
 -yuv = 

Re: [x265] [PATCH 1 of 4] cu, search: remove redundant m_rdGoOnSbacCoder pointer

2014-07-10 Thread Deepthi Nandakumar
This series is awesome...


On Fri, Jul 11, 2014 at 6:00 AM, Steve Borho st...@borho.org wrote:

 # HG changeset patch
 # User Steve Borho st...@borho.org
 # Date 1405031594 18000
 #  Thu Jul 10 17:33:14 2014 -0500
 # Node ID d6c423c66e4d5a06dc4dccbd9eade7a21f9c8adf
 # Parent  57a4c1c2274ec6b40f4a138523c6b67ffa853e09
 cu, search: remove redundant m_rdGoOnSbacCoder pointer

 m_rdGoOnSbacCoder was always either pointing to the same SBac as
 m_sbacCoder
 (for calls to compressCU()) or it was supposed to be unused (for calls to
 encodeCU)

 diff -r 57a4c1c2274e -r d6c423c66e4d source/Lib/TLibEncoder/TEncCu.cpp
 --- a/source/Lib/TLibEncoder/TEncCu.cpp Thu Jul 10 14:13:30 2014 -0500
 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Thu Jul 10 17:33:14 2014 -0500
 @@ -75,7 +75,6 @@
  m_rdCost  = NULL;
  m_sbacCoder   = NULL;
  m_rdSbacCoders= NULL;
 -m_rdGoOnSbacCoder = NULL;
  m_bBitCounting= false;
  }

 @@ -1348,7 +1347,7 @@
  // Encode Coefficients
  bool bEncodeDQP = m_bEncodeDQP;
  m_sbacCoder-codeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0),
 bEncodeDQP);
 -m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
 +m_sbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
  outTempCU-m_totalBits = m_sbacCoder-getNumberOfWrittenBits();
  outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits;

 @@ -1397,7 +1396,7 @@
  // Encode Coefficients
  bool bCodeDQP = m_bEncodeDQP;
  m_sbacCoder-codeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0),
 bCodeDQP);
 -m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
 +m_sbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
  outTempCU-m_totalBits = m_sbacCoder-getNumberOfWrittenBits();
  outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits;

 diff -r 57a4c1c2274e -r d6c423c66e4d source/Lib/TLibEncoder/TEncCu.h
 --- a/source/Lib/TLibEncoder/TEncCu.h   Thu Jul 10 14:13:30 2014 -0500
 +++ b/source/Lib/TLibEncoder/TEncCu.h   Thu Jul 10 17:33:14 2014 -0500
 @@ -124,7 +124,6 @@

  // RD SBac pointers
  SBac   (*m_rdSbacCoders)[CI_NUM];
 -SBac*m_rdGoOnSbacCoder;

  uint8_t  m_totalDepth;

 diff -r 57a4c1c2274e -r d6c423c66e4d source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 10 14:13:30 2014
 -0500
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 10 17:33:14 2014
 -0500
 @@ -68,7 +68,6 @@
  m_trQuant = NULL;
  m_sbacCoder = NULL;
  m_rdSbacCoders = NULL;
 -m_rdGoOnSbacCoder = NULL;
  m_numLayers = 0;
  }

 @@ -636,7 +635,7 @@
  if (checkTransformSkip || checkTQbypass)
  {
  //- store original entropy coding status -
 -
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
 +
  m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);

  uint32_t  singleDistYTmp = 0;
  uint32_t  singlePsyEnergyYTmp = 0;
 @@ -694,10 +693,10 @@
  bestTQbypass = singleTQbypass;
  bestModeId   = modeId;
  if (bestModeId == firstCheckId)
 -
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
 +
  m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
  }
  if (modeId == firstCheckId)
 -
  m_rdGoOnSbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
 +
  m_sbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
  }

  cu-setTransformSkipSubParts(checkTransformSkip ? bestModeId
 : 0, TEXT_LUMA, absPartIdx, fullDepth);
 @@ -708,7 +707,7 @@
  {
  xLoadIntraResultQT(cu, absPartIdx, log2TrSize, reconQt,
 reconQtStride);
  cu-setCbfSubParts(singleCbfY  trDepth, TEXT_LUMA,
 absPartIdx, fullDepth);
 -
  m_rdGoOnSbacCoder-load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
 +
  m_sbacCoder-load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
  }
  else
  {
 @@ -719,7 +718,7 @@
  }
  else
  {
 -
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
 +
  m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);

  //- code luma block with given intra prediction mode and
 store Cbf-
  cu-setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx,
 fullDepth);
 @@ -748,11 +747,11 @@
  //- store full entropy coding status, load original entropy
 coding status -
  if (bCheckFull)
  {
 -
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_TEST]);
 -
  m_rdGoOnSbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
 +
  m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_TEST]);
 +
  m_sbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
  }
  else
 -
  m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
 +
  

Re: [x265] Many more warnings by GCC 4.8.2

2014-07-20 Thread Deepthi Nandakumar
Thanks, Mario. Except for the first one, most are harmless. But, we'll be
fixing those right away.


On Mon, Jul 21, 2014 at 9:59 AM, Mario Rohkrämer cont...@ligh.de wrote:

 No panic; I know that many reasons for warnings are less than serious.
 Just reporting.

 __


 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp: In
 member function 'void x265::TComWeightPrediction::
 getWpScaling(x265::TComDataCU*, int, int, x265::WeightParam*,
 x265::WeightParam*)':
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:518:62:
 warning: suggest braces around empty body in an 'else' statement
 [-Wempty-body]
  X265_CHECK(0, unexpected wpScaling configuration\n);
   ^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp: In
 member function 'void x265::TComWeightPrediction::
 xWeightedPredictionBi(x265::TComDataCU*, x265::TComYuv*, x265::TComYuv*,
 int, int, uint32_t, int, int, x265::TComYuv*, bool, bool)':
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:537:45:
 warning: 'pwp1' may be used uninitialized in this function
 [-Wmaybe-uninitialized]
  wp1[yuv].round  = wp0[yuv].round;
  ^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:567:26:
 note: 'pwp1' was declared here
  WeightParam  *pwp0, *pwp1;
   ^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:530:51:
 warning: 'pwp0' may be used uninitialized in this function
 [-Wmaybe-uninitialized]
  wp0[yuv].w  = wp0[yuv].inputWeight;
^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:567:19:
 note: 'pwp0' was declared here
  WeightParam  *pwp0, *pwp1;
^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp: In
 member function 'void x265::TComWeightPrediction::
 xWeightedPredictionBi(x265::TComDataCU*, x265::ShortYuv*,
 x265::ShortYuv*, int, int, uint32_t, int, int, x265::TComYuv*, bool, bool)':
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:537:45:
 warning: 'pwp1' may be used uninitialized in this function
 [-Wmaybe-uninitialized]
  wp1[yuv].round  = wp0[yuv].round;
  ^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:603:26:
 note: 'pwp1' was declared here
  WeightParam  *pwp0, *pwp1;
   ^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:530:51:
 warning: 'pwp0' may be used uninitialized in this function
 [-Wmaybe-uninitialized]
  wp0[yuv].w  = wp0[yuv].inputWeight;
^
 h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:603:19:
 note: 'pwp0' was declared here
  WeightParam  *pwp0, *pwp1;
^
 __


 h:/MSYS/home/LigH/x265/source/encoder/level.cpp: In function 'void
 x265::determineLevel(const x265_param, x265::Profile::Name,
 x265::Level::Name, x265::Level::Tier)':
 h:/MSYS/home/LigH/x265/source/encoder/level.cpp:143:24: warning: array
 subscript is above array bounds [-Warray-bounds]
  while (levels[i].levelIdc  param.levelIdc  levels[i].levelIdc)
 ^
 h:/MSYS/home/LigH/x265/source/encoder/level.cpp:143:24: warning: array
 subscript is above array bounds [-Warray-bounds]

 __


 h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp: In member function
 'bool x265::RateControl::initPass2()':
 h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning:
 'minVal' may be used uninitialized in this function [-Wmaybe-uninitialized]
  bool RateControl::initPass2()
   ^
 h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning:
 'maxVal' may be used uninitialized in this function [-Wmaybe-uninitialized]
 h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'a'
 may be used uninitialized in this function [-Wmaybe-uninitialized]
 h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning:
 'minVal' may be used uninitialized in this function [-Wmaybe-uninitialized]
 h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning:
 'maxVal' may be used uninitialized in this function [-Wmaybe-uninitialized]
 h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'a'
 may be used uninitialized in this function [-Wmaybe-uninitialized]

 --

 Fun and success!
 Mario *LigH* Rohkrämer
 mailto:cont...@ligh.de
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] Custom LowRes scale

2014-07-21 Thread Deepthi Nandakumar
Thanks, this is certainly an enhancement to x265 lookahead. We would be
interested in this - especially if you can also include some efficiency
(bitrate vs SSIM) metrics that describe the penalty moving from
X265_LOWRES_SCALE of 4 to higher scales.


On Mon, Jul 21, 2014 at 8:49 PM, Nicolas Morey-Chaisemartin 
nmo...@kalray.eu wrote:

 Hi,

 We recently profiled x265 pre-analysis to estimate what performance we
 could reach using our accelerator and I was quite disappointed by the
 performance.
 When running on a Core-i7 with AVX at roughly 2.7GHz, we barely reached
 the 30fps mark using ultrafast preset on a 4K video.



 After a little bit of browsing I realized that work in LosRew is always
 done at 1/4th of the final resolution which seems fair but requires a huge
 amount of work for 4K.
 It seemed straight forward enough to change the divider at LowRes
 initialization but it seems there are a lot of hard coded values that
 depend both on the LowRes divider and the LowRes CU Size.

 Here's a patch (definitly not applicable like this but just to give an
 idea of where I'm going) that seems to fix most of the hard-coded value.
 It still works with a X265_LOWRES_SCALE of 4 and the perf is definilty
 improving (29fps = 40fps on a 2048x1024 medium preset on a E5504).

 Would you be interested in a clean version of this? At least the
 hard-coded CU_SIZE part?
 IMHO it would be better to have dynamic value for LowRes depending on
 preset (or equivalent) and the input resolution...
 1/4th is fast enough in HD not to be an issue but for RT stream in 4K or
 more, 1/16 will be compulsory.

 Nicolas

 ---
  x265/source/common/common.h  |  1 +
  x265/source/common/lowres.cpp|  4 ++--
  x265/source/encoder/frameencoder.cpp |  7 ---
  x265/source/encoder/ratecontrol.cpp  | 16 
  x265/source/encoder/slicetype.cpp|  8 
  5 files changed, 19 insertions(+), 17 deletions(-)

 diff --git a/x265/source/common/common.h b/x265/source/common/common.h
 index 06f60e7..00e73fc 100644
 --- a/x265/source/common/common.h
 +++ b/x265/source/common/common.h
 @@ -156,6 +156,7 @@ typedef int32_t  coeff_t;  // transform coefficient
  // high cost estimates (intra and inter both suffer)
  #define X265_LOWRES_CU_SIZE   8
  #define X265_LOWRES_CU_BITS   3
 +#define X265_LOWRES_SCALE 2
   #define X265_MALLOC(type, count)(type*)x265_malloc(sizeof(type) *
 (count))
  #define X265_FREE(ptr)  x265_free(ptr)
 diff --git a/x265/source/common/lowres.cpp b/x265/source/common/lowres.cpp
 index 5fc2f6b..6138023 100644
 --- a/x265/source/common/lowres.cpp
 +++ b/x265/source/common/lowres.cpp
 @@ -31,8 +31,8 @@ bool Lowres::create(TComPicYuv *orig, int _bframes, bool
 bAQEnabled)
  {
  isLowres = true;
  bframes = _bframes;
 -width = orig-getWidth() / 2;
 -lines = orig-getHeight() / 2;
 +width = orig-getWidth() / X265_LOWRES_SCALE;
 +lines = orig-getHeight() / X265_LOWRES_SCALE;
  lumaStride = width + 2 * orig-getLumaMarginX();
  if (lumaStride  31)
  lumaStride += 32 - (lumaStride  31);
 diff --git a/x265/source/encoder/frameencoder.cpp b/x265/source/encoder/
 frameencoder.cpp
 index 8c3ee26..7213f60 100644
 --- a/x265/source/encoder/frameencoder.cpp
 +++ b/x265/source/encoder/frameencoder.cpp
 @@ -1300,9 +1300,10 @@ int FrameEncoder::calcQpForCu(uint32_t cuAddr,
 double baseQp)
   /* Derive qpOffet for each CU by averaging offsets for all 16x16
 blocks in the cu. */
  double qp_offset = 0;
 -int maxBlockCols = (m_frame-getPicYuvOrg()-getWidth() + (16 - 1))
 / 16;
 -int maxBlockRows = (m_frame-getPicYuvOrg()-getHeight() + (16 - 1))
 / 16;
 -int noOfBlocks = g_maxCUSize / 16;
 +int lowResCu = (X265_LOWRES_CU_SIZE * X265_LOWRES_SCALE);
 +int maxBlockCols = (m_frame-getPicYuvOrg()-getWidth() + (lowResCu
 - 1)) / lowResCu;
 +int maxBlockRows = (m_frame-getPicYuvOrg()-getHeight() + (lowResCu
 - 1)) / lowResCu;
 +int noOfBlocks = g_maxCUSize / lowResCu;
  int block_y = (cuAddr / m_frame-getPicSym()-getFrameWidthInCU()) *
 noOfBlocks;
  int block_x = (cuAddr * noOfBlocks) - block_y * m_frame-getPicSym()-
 getFrameWidthInCU();
  diff --git a/x265/source/encoder/ratecontrol.cpp b/x265/source/encoder/
 ratecontrol.cpp
 index 4358994..5fcc27a 100644
 --- a/x265/source/encoder/ratecontrol.cpp
 +++ b/x265/source/encoder/ratecontrol.cpp
 @@ -161,8 +161,8 @@ void RateControl::calcAdaptiveQuantFrame(Frame *pic)
  if (m_param-rc.aqMode == X265_AQ_NONE || m_param-rc.aqStrength == 0)
  {
  /* Need to init it anyways for CU tree */
 -int cuWidth = ((maxCol / 2) + X265_LOWRES_CU_SIZE - 1) 
 X265_LOWRES_CU_BITS;
 -int cuHeight = ((maxRow / 2) + X265_LOWRES_CU_SIZE - 1) 
 X265_LOWRES_CU_BITS;
 +int cuWidth = ((maxCol / X265_LOWRES_SCALE) + X265_LOWRES_CU_SIZE
 - 1)  X265_LOWRES_CU_BITS;
 +int cuHeight = ((maxRow / X265_LOWRES_SCALE) +
 X265_LOWRES_CU_SIZE - 1)  

Re: [x265] [PATCH] psyrdoq: implementation of psyrdoq

2014-07-22 Thread Deepthi Nandakumar
Does it make sense to try this for DC coefficients?


On Tue, Jul 22, 2014 at 9:24 PM, Steve Borho st...@borho.org wrote:

 On 07/22, sumala...@multicorewareinc.com wrote:
  # HG changeset patch
  # User Sumalatha Polureddysumala...@multicorewareinc.com
  # Date 1406032149 -19800
  # Node ID 37e03dcd2e4f0b5894880ff8c097bd6e11590459
  # Parent  d303b4d860e9f06396a156726dd518d0f41fe796
  psyrdoq: implementation of psyrdoq
 
  diff -r d303b4d860e9 -r 37e03dcd2e4f
 source/Lib/TLibCommon/TComTrQuant.cpp
  --- a/source/Lib/TLibCommon/TComTrQuant.cpp   Mon Jul 21 22:43:38 2014
 -0500
  +++ b/source/Lib/TLibCommon/TComTrQuant.cpp   Tue Jul 22 17:59:09 2014
 +0530
  @@ -64,6 +64,8 @@
   return y + ((x - y)  ((x - y)  (sizeof(int) * CHAR_BIT - 1)));
 // min(x, y)
   }
 
  +#define SIGN(x,y) ((x^(y  31))-(y  31))
  +
   //
 
   // TComTrQuant class member functions
   //
 
  @@ -307,6 +309,8 @@
   }
 
   uint32_t TComTrQuant::transformNxN(TComDataCU* cu,
  +   pixel*  fenc,
  +   uint32_tfencStride,
  int16_t*residual,
  uint32_tstride,
  coeff_t*coeff,
  @@ -316,10 +320,10 @@
  booluseTransformSkip,
  boolcurUseRDOQ)
   {
  +int trSize = 1  log2TrSize;
   if (cu-getCUTransquantBypass(absPartIdx))
   {
   uint32_t numSig = 0;
  -int trSize = 1  log2TrSize;
   for (int k = 0; k  trSize; k++)
   {
   for (int j = 0; j  trSize; j++)
  @@ -339,6 +343,12 @@
   const uint32_t sizeIdx = log2TrSize - 2;
   int useDST = (sizeIdx == 0  ttype == TEXT_LUMA 
 cu-getPredictionMode(absPartIdx) == MODE_INTRA);
   int index = DCT_4x4 + sizeIdx - useDST;
  +if (psyRdoqEnabled())
  +{
  +// converting pixel to int and putting in separate buffer
 to take dct
  +primitives.square_copy_ps[sizeIdx](m_tmpfencBuf,
 MAX_CU_SIZE, fenc, fencStride);
  +primitives.dct[index](m_tmpfencBuf, m_tmpfencCoeff, stride);
  +}
   primitives.dct[index](residual, m_tmpCoeff, stride);
   if (m_nr-bNoiseReduction)
   {
  @@ -356,7 +366,7 @@
 
   if (m_useRDOQ  curUseRDOQ)
   {
  -return xRateDistOptQuant(cu, m_tmpCoeff, coeff, log2TrSize,
 ttype, absPartIdx);
  +return xRateDistOptQuant(cu, m_tmpfencCoeff, m_tmpCoeff, coeff,
 log2TrSize, ttype, absPartIdx);
   }
   return xQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx);
   }
  @@ -505,7 +515,7 @@
* Rate distortion optimized quantization for entropy
* coding engines using probability models like CABAC
*/
  -uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t*
 srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize,
  +uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t*
 fencCoeff, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize,
   TextType ttype, uint32_t
 absPartIdx)
   {
   uint32_t trSize = 1  log2TrSize;
  @@ -614,7 +624,7 @@
   {
   level = xGetCodedLevel(costCoeff[scanPos],
 curCostSig, costSig[scanPos],
  levelDouble, maxAbsLevel,
 baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
  -   c1c2Idx, qbits, scaleFactor,
 1);
  +   c1c2Idx, qbits, scaleFactor,
 1, srcCoeff[blkPos], fencCoeff[blkPos]);
   sigRateDelta[blkPos] = 0;
   }
   else
  @@ -631,7 +641,7 @@
   curCostSig = xGetRateSigCoef(1, ctxSig);
   level = xGetCodedLevel(costCoeff[scanPos],
 curCostSig, costSig[scanPos],
  levelDouble,
 maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
  -   c1c2Idx, qbits,
 scaleFactor, 0);
  +   c1c2Idx, qbits,
 scaleFactor, 0, srcCoeff[blkPos], fencCoeff[blkPos]);
   }
   else
   {
  @@ -1126,7 +1136,9 @@
   uint32_t c1c2Idx,
   int  qbits,
   double   scaleFactor,
  -bool last) const
  +bool last,
 

Re: [x265] [PATCH] psyrdoq: implementation of psyrdoq

2014-07-22 Thread Deepthi Nandakumar
Ok, Thats what I thought too.
On Jul 23, 2014 8:55 AM, Steve Borho st...@borho.org wrote:

 On 07/23, Deepthi Nandakumar wrote:
  Does it make sense to try this for DC coefficients?

 my understanding is that it is not helpful, and possibly harmful.

 we don't want to bias the DC coefficient in any way.

 snipped

 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Added fast intra search option

2014-08-13 Thread Deepthi Nandakumar
There are a couple of warnings our regression tests caught with this. Can
you take a look?

source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' :
forcing value to bool 'true' or 'false' (performance warning)
(IntraFilterType can be bool, I think?).


C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning
C4701: potentially uninitialized local variable 'lowmode' used

Thanks,
Deepthi



On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com wrote:

 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1407882999 25200
 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5
 # Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
 Added fast intra search option

 This version calls intra_pred_allangs  to create the predictions then the
 faster search with satd

 diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp
 --- a/source/common/param.cpp   Tue Aug 12 01:11:39 2014 -0500
 +++ b/source/common/param.cpp   Tue Aug 12 15:36:39 2014 -0700
 @@ -132,6 +132,7 @@
  /* Intra Coding Tools */
  param-bEnableConstrainedIntra = 0;
  param-bEnableStrongIntraSmoothing = 1;
 +param-bEnableFastIntra = 0;

  /* Inter Coding tools */
  param-searchMethod = X265_HEX_SEARCH;
 @@ -560,6 +561,7 @@
  OPT(lossless) p-bLossless = atobool(value);
  OPT(cu-lossless) p-bCULossless = atobool(value);
  OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value);
 +OPT(fast-intra) p-bEnableFastIntra = atobool(value);
  OPT(open-gop) p-bOpenGOP = atobool(value);
  OPT(scenecut)
  {
 @@ -1211,6 +1213,7 @@
  BOOL(p-bLossless, lossless);
  BOOL(p-bCULossless, cu-lossless);
  BOOL(p-bEnableConstrainedIntra, constrained-intra);
 +BOOL(p-bEnableFastIntra, fast-intra);
  BOOL(p-bOpenGOP, open-gop);
  s += sprintf(s,  interlace=%d, p-interlaceMode);
  s += sprintf(s,  keyint=%d, p-keyframeMax);
 diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cpp  Tue Aug 12 01:11:39 2014 -0500
 +++ b/source/encoder/slicetype.cpp  Tue Aug 12 15:36:39 2014 -0700
 @@ -1242,6 +1242,7 @@
  {
  m_rows[i].m_widthInCU = m_widthInCU;
  m_rows[i].m_heightInCU = m_heightInCU;
 +m_rows[i].m_param = m_param;
  }

  if (!WaveFront::init(m_heightInCU))
 @@ -1676,26 +1677,86 @@

  int predsize = cuSize * cuSize;

 -// generate 35 intra predictions into tmp
 +// generate 35 intra predictions into m_predictions
 +pixelcmp_t satd =
 primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
 +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost =
 m_me.COST_MAX;
 +uint32_t  lowmode, mode;
  primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize,
 left0, above0, 0, (cuSize = 16));
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
 +if (cost  icost)
 +icost = cost;
  pixel *above = (cuSize = 8) ? above1 : above0;
  pixel *left  = (cuSize = 8) ? left1 : left0;
 -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions +
 predsize, cuSize, left, above, 0, 0);
 +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize,
 left, above, 0, 0);
 +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
 +if (cost  icost)
 +icost = cost;
  primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 *
 predsize, above0, left0, above1, left1, (cuSize = 16));

 -// calculate 35 satd costs, keep least cost
 +// calculate satd costs, keep least cost
  ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
  primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
 -pixelcmp_t satd =
 primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
 -int icost = m_me.COST_MAX, cost;
 -for (uint32_t mode = 0; mode  35; mode++)
 +// fast-intra angle search
 +if (m_param-bEnableFastIntra)
  {
 -if ((mode = 2)  (mode  18))
 +for (mode = 4;mode  35; mode += 5)
 +{
 +if (mode  18)
 +cost = satd(buf_trans, cuSize, m_predictions[mode *
 predsize], cuSize);
 +else
 +cost = satd(m_me.fenc, FENC_STRIDE,
 m_predictions[mode * predsize], cuSize);
 +if (cost  acost)
 +{
 +lowmode = mode;
 +acost = cost;
 +}
 +}
 +mode = lowmode - 2;
 +if (mode  18)
 +lowcost = satd(buf_trans, cuSize, m_predictions[mode *
 predsize], cuSize);
 +else
 +lowcost = satd(m_me.fenc, FENC_STRIDE,
 m_predictions[mode * predsize], cuSize);
 +highcost = m_me.COST_MAX;
 +if (lowmode  34)
 +{
 +mode = lowmode + 2;
 +if (mode  18)

Re: [x265] [PATCH] count_nonzero primitive, downscaling quantCoeff from int32_t* to int16_t*

2014-08-14 Thread Deepthi Nandakumar
Praveen,

Can you build a mercurial queue for these quant patches - so they can be
reviewed and pushed in once quant is 16-bit everywhere?

Thanks,
Deepthi


On Thu, Aug 14, 2014 at 2:01 AM, Steve Borho st...@borho.org wrote:

 On 08/12, prav...@multicorewareinc.com wrote:
  # HG changeset patch
  # User Praveen Tiwari
  # Date 1407834530 -19800
  # Node ID bb4d44663964237e4b66af6d92b2f13dbcf4f9b9
  # Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
  count_nonzero primitive, downscaling quantCoeff from int32_t* to int16_t*

 There's not much point in applying these patches until all of the quant
 primitives are using short ints for coefficients. As-is this will just
 be a slow-down.

  diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/dct.cpp
  --- a/source/common/dct.cpp   Tue Aug 12 01:11:39 2014 -0500
  +++ b/source/common/dct.cpp   Tue Aug 12 14:38:50 2014 +0530
  @@ -815,7 +815,7 @@
   return numSig;
   }
 
  -int  count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
  +int  count_nonzero_c(const int16_t *quantCoeff, int numCoeff)
   {
   X265_CHECK(((intptr_t)quantCoeff  15) == 0, quant buffer not
 aligned\n);
   X265_CHECK(numCoeff  0  (numCoeff  15) == 0, numCoeff invalid
 %d\n, numCoeff);
  diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/primitives.h
  --- a/source/common/primitives.h  Tue Aug 12 01:11:39 2014 -0500
  +++ b/source/common/primitives.h  Tue Aug 12 14:38:50 2014 +0530
  @@ -163,7 +163,7 @@
   typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff,
 int32_t *qCoef, int qBits, int add, int numCoeff);
   typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t
 *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
   typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t*
 coef, int num, int scale, int shift);
  -typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int
 numCoeff);
  +typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int
 numCoeff);
 
   typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t
 srcStride, intptr_t dstStride, int width, int height, int w0, int round,
 int shift, int offset);
   typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t
 srcStride, intptr_t dstStride, int width, int height, int w0, int round,
 int shift, int offset);
  diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/quant.cpp
  --- a/source/common/quant.cpp Tue Aug 12 01:11:39 2014 -0500
  +++ b/source/common/quant.cpp Tue Aug 12 14:38:50 2014 +0530
  @@ -2,6 +2,7 @@
* Copyright (C) 2014 x265 project
*
* Authors: Steve Borho st...@borho.org
  + *  Praveen Kumar Tiwari prav...@multicorewareinc.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
  @@ -463,7 +464,17 @@
   const uint32_t sizeIdx = log2TrSize - 2;
   int useDST = !sizeIdx  ttype == TEXT_LUMA  bIntra;
 
  -X265_CHECK((int)numSig == primitives.count_nonzero(coeff, 1 
 log2TrSize * 2), numSig differ\n);
  +/* This section of code is to safely convert int32_t
 coefficients to int16_t, once the caller function is
  + * optimize to take coefficients as int16_t*, it will be
 cleanse.*/
  +int numCoeff = (1  (log2TrSize * 2));
  +assert(numCoeff = 1024);
  +ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
  +for (int i = 0; i  numCoeff; i++)
  +{
  +qCoeff[i] = (coeff[i]  0x);
  +}
  +
  +X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, 1 
 log2TrSize * 2), numSig differ\n);
 
   // DC only
   if (numSig == 1  coeff[0] != 0  !useDST)
  @@ -501,7 +512,16 @@
   int numCoeff = 1  log2TrSize * 2;
   uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef,
 dstCoeff, qbits, add, numCoeff);

 These two loops are only here for an X265_CHECK statement that is
 usually compiled out.  All of this code should have been wrapped within
 #if CHECKED_BUILD || _DEBUG

  -X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff,
 numCoeff), numSig differ\n);
  +/* This section of code is to safely convert int32_t coefficients
 to int16_t, once the caller function is
  + * optimize to take coefficients as int16_t*, it will be cleanse.*/
  +assert(numCoeff = 1024);
  +ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
  +for (int i = 0; i  numCoeff; i++)
  +{
  +qCoeff[i] = (dstCoeff[i]  0x);
  +}
  +
  +X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff,
 numCoeff), numSig differ\n);
   if (!numSig)
   return 0;
 
  diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/x86/pixel-util.h
  --- a/source/common/x86/pixel-util.h  Tue Aug 12 01:11:39 2014 -0500
  +++ b/source/common/x86/pixel-util.h  Tue Aug 12 14:38:50 2014 +0530
  @@ -2,6 +2,7 @@
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho st...@borho.org
  + * 

Re: [x265] psycho-visual feature tuning, feature freeze

2014-08-19 Thread Deepthi Nandakumar
I have removed the temporary disabling we had in place for rdoq, when
psy-rd was enabled since rdoq was not psy-aware at that point.

RDOQ is turned on in rdLevels 4 and above. Psy-rdoq can be turned on if
rdoq is enabled. Psy-rd can be turned on with or without rdoq/psy-rdoq.



On Sat, Aug 16, 2014 at 11:24 PM, Steve Borho st...@borho.org wrote:

 Hello,

 I've pushed some (what I hope to be) final tunings of the two new
 psycho-visual optimization features. The features are still disabled by
 default, but the recommended values are now 1.0 for both.

 http://x265.readthedocs.org/en/stable/cli.html#psycho-visual-options

 Please try them out.

 The stable branch has been merged with default in preparation of a 1.3
 tag, which should happen early this week.

 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] psycho-visual feature tuning, feature freeze

2014-08-19 Thread Deepthi Nandakumar
Our general suggestion would be to use psy-rd and psy-rdoq together for
best visual quality.


On Tue, Aug 19, 2014 at 2:18 PM, Deepthi Nandakumar 
deep...@multicorewareinc.com wrote:

 I have removed the temporary disabling we had in place for rdoq, when
 psy-rd was enabled since rdoq was not psy-aware at that point.

 RDOQ is turned on in rdLevels 4 and above. Psy-rdoq can be turned on if
 rdoq is enabled. Psy-rd can be turned on with or without rdoq/psy-rdoq.



 On Sat, Aug 16, 2014 at 11:24 PM, Steve Borho st...@borho.org wrote:

 Hello,

 I've pushed some (what I hope to be) final tunings of the two new
 psycho-visual optimization features. The features are still disabled by
 default, but the recommended values are now 1.0 for both.

 http://x265.readthedocs.org/en/stable/cli.html#psycho-visual-options

 Please try them out.

 The stable branch has been merged with default in preparation of a 1.3
 tag, which should happen early this week.

 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

2014-08-25 Thread Deepthi Nandakumar
Ashok is already working on pre-calculating these inside-picture flags
along with more refactors. After his refactors are in, we can check whether
padding will improve performance.

In fact, very likely he already has a local version of the logic in this
patch.


On Mon, Aug 25, 2014 at 10:46 PM, Steve Borho st...@borho.org wrote:

 On 08/25, Satoshi Nakagawa wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com
  # Date 1408956792 -32400
  #  Mon Aug 25 17:53:12 2014 +0900
  # Node ID 7145e57c722a94a06faec33e3041442032a1892f
  # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
  replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

 Queued for default, thanks.

 There seems to be a lot of logic that checks for 'inside picture
 bounds'. It seems like we could save a lot of CPU cycles if we padded
 input pictures to the max-ctu size instead of the min-ctu size and
 adjusted the conformance window accordingly.

  diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp
  --- a/source/Lib/TLibCommon/TComDataCU.cppFri Aug 22 15:53:34 2014
 -0500
  +++ b/source/Lib/TLibCommon/TComDataCU.cppMon Aug 25 17:53:12 2014
 +0900
  @@ -816,12 +816,12 @@
 
   TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx,
 uint32_t curPartUnitIdx)
   {
  +if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) =
 m_slice-m_sps-picWidthInLumaSamples)
  +return NULL;
  +
   uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
   uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
  -if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) =
 m_slice-m_sps-picWidthInLumaSamples)
  -return NULL;
  -
   if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1,
 numPartInCUSize))
   {
   if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize))
  @@ -857,14 +857,11 @@
 
   TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx,
 uint32_t curPartUnitIdx)
   {
  -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
  +if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) =
 m_slice-m_sps-picHeightInLumaSamples)
  +return NULL;
 
  -if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) =
 m_slice-m_sps-picHeightInLumaSamples)
  -{
  -return NULL;
  -}
  -
  -uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
  +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
  +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
   if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1,
 numPartInCUSize))
   {
  @@ -895,15 +892,14 @@
 
   TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx,
 uint32_t curPartUnitIdx, uint32_t partUnitOffset)
   {
  -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
  -
  -if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_rasterToPelY[absPartIdxLB] + (partUnitOffset  LOG2_UNIT_SIZE)) =
  +if ((m_pic-getCU(m_cuAddr)-getCUPelY() +
 g_zscanToPelY[curPartUnitIdx] + (partUnitOffset  LOG2_UNIT_SIZE)) =
   m_slice-m_sps-picHeightInLumaSamples)
   {
   return NULL;
   }
 
  -uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
  +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
  +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
   if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize -
 partUnitOffset, numPartInCUSize))
   {
  @@ -938,14 +934,13 @@
 
   TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx,
 uint32_t curPartUnitIdx, uint32_t partUnitOffset)
   {
  -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
  -
  -if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_rasterToPelX[absPartIdxRT] + (partUnitOffset  LOG2_UNIT_SIZE)) =
  +if ((m_pic-getCU(m_cuAddr)-getCUPelX() +
 g_zscanToPelX[curPartUnitIdx] + (partUnitOffset  LOG2_UNIT_SIZE)) =
   m_slice-m_sps-picWidthInLumaSamples)
   {
   return NULL;
   }
 
  +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
   uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
   if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize -
 partUnitOffset, numPartInCUSize))
  @@ -954,7 +949,7 @@
   {
   if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT -
 numPartInCUSize + partUnitOffset])
   {
  -uint32_t absZorderCUIdx  =
 g_zscanToRaster[m_absIdxInLCU] + (1  (m_log2CUSize[0] - LOG2_UNIT_SIZE))
 - 1;
  +uint32_t absZorderCUIdx =
 g_zscanToRaster[m_absIdxInLCU] + (1  (m_log2CUSize[0] - LOG2_UNIT_SIZE))
 - 1;
   arPartUnitIdx = g_rasterToZscan[absPartIdxRT -
 numPartInCUSize + partUnitOffset];
   if (RasterAddress::isEqualRowOrCol(absPartIdxRT,
 

Re: [x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless

2014-08-26 Thread Deepthi Nandakumar
Thanks, Min. This is a solution but will affect performance with an extra
TComDataCU*. I have sent another patch where I'm just re-encoding the CU if
lossless is chosen as the best mode. This will not affect normal analysis.
Can you review that?

Deepthi


On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho st...@borho.org wrote:

 # HG changeset patch
 # User Min Chen chenm...@163.com
 # Date 1409002891 18000
 #  Mon Aug 25 16:41:31 2014 -0500
 # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264
 # Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
 analysis: fix inter hash mistake with --cu-lossless

 diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014
 +0900
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014
 -0500
 @@ -2293,7 +2293,7 @@
   * \returns void
   */
  void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv*
 fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,
 -   ShortYuv* outBestResiYuv,
 TComYuv* outReconYuv)
 +   ShortYuv* outBestResiYuv,
 TComYuv* outReconYuv, TComDataCU* tmpCu)
  {
  X265_CHECK(!cu-isIntra(0), intra CU not expected\n);

 @@ -2321,6 +2321,7 @@
  }

  uint64_t bestCost = MAX_INT64;
 +bool bestTransquantBypassFlag = bIsTQBypassEnable;

  for (uint32_t modeId = 0; modeId  numModes; modeId++)
  {
 @@ -2388,15 +2389,29 @@
  if (cu-getQtRootCbf(0))
  xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);

 +bestTransquantBypassFlag = bIsLosslessMode;
  bestBits = bits;
  bestCost = cost;
  bestCoeffBits = cu-m_coeffBits;
  m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
  }
 +
 +// Save lossless mode coeff
 +if (bIsLosslessMode)
 +{
 +tmpCu-copyPartFrom(cu, 0, depth, false);
 +}
  }

  X265_CHECK(bestCost != MAX_INT64, no best cost\n);

 +if (bestTransquantBypassFlag  !m_param-bLossless)
 +{
 +assert(log2CUSize  2);
 +cu-setCUTransquantBypassSubParts(true, 0, depth);
 +cu-copyPartFrom(tmpCu, 0, depth, false);
 +}
 +
  if (cu-getQtRootCbf(0))
  outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize);
  else
 diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h
 --- a/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 17:53:12 2014
 +0900
 +++ b/source/Lib/TLibEncoder/TEncSearch.h   Mon Aug 25 16:41:31 2014
 -0500
 @@ -147,7 +147,7 @@

  /// encode residual and compute rd-cost for inter mode
  void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
 TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
 -   TComYuv* reconYuv);
 +   TComYuv* reconYuv, TComDataCU* tmpCu);
  void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv,
 TComYuv* predYuv, TComYuv* reconYuv);

  void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx, TComYuv* fencYuv,
 diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Mon Aug 25 17:53:12 2014 +0900
 +++ b/source/encoder/analysis.cpp   Mon Aug 25 16:41:31 2014 -0500
 @@ -82,7 +82,7 @@
  uint32_t sizeL = cuSize * cuSize;
  uint32_t sizeC = sizeL  (CHROMA_H_SHIFT(csp) +
 CHROMA_V_SHIFT(csp));

 -ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8,
 tqBypass);
 +ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9,
 tqBypass);

  m_interCU_2Nx2N[i]  = new TComDataCU;
  m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize,
 csp, 0, tqBypass);
 @@ -108,6 +108,9 @@
  m_tempCU[i] = new TComDataCU;
  m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7,
 tqBypass);

 +m_tempLosslessCU[i] = new TComDataCU;
 +m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize,
 csp, 8, tqBypass);
 +
  m_bestPredYuv[i] = new TComYuv;
  ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp);

 @@ -158,6 +161,7 @@
  delete m_bestMergeCU[i];
  delete m_bestCU[i];
  delete m_tempCU[i];
 +delete m_tempLosslessCU[i];

  if (m_bestPredYuv  m_bestPredYuv[i])
  {
 @@ -240,6 +244,7 @@
  // initialize CU data
  m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
  m_tempCU[0]-initCU(cu-m_pic, cu-getAddr());
 +m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr());

  // analysis of CU
  uint32_t numPartition = cu-getTotalNumPart();
 @@ -394,6 +399,7 @@
  uint32_tnextDepth = depth + 1;
  TComDataCU* subBestPartCU = m_bestCU[nextDepth];
  TComDataCU* subTempPartCU = m_tempCU[nextDepth];
 +TComDataCU* 

Re: [x265] fix m_initSliceContext (uninitialised m_sliceQp)

2014-08-26 Thread Deepthi Nandakumar
Thanks, queued for default (does not apply on stable due to the SAO
refactor).


On Tue, Aug 26, 2014 at 1:55 PM, Satoshi Nakagawa nakagawa...@oki.com
wrote:

 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1409041357 -32400
 #  Tue Aug 26 17:22:37 2014 +0900
 # Node ID c18255467f12da1a780340ade55292c32d95bfdd
 # Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
 fix m_initSliceContext (uninitialised m_sliceQp)

 diff -r 5acfb12ec5d1 -r c18255467f12 source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp   Mon Aug 25 17:53:12 2014 +0900
 +++ b/source/encoder/frameencoder.cpp   Tue Aug 26 17:22:37 2014 +0900
 @@ -158,8 +158,6 @@
  int64_t startCompressTime = x265_mdate();
  Slice* slice = m_frame-m_picSym-m_slice;

 -m_initSliceContext.resetEntropy(slice);
 -
  /* Emit access unit delimiter unless this is the first frame and the
 user is
   * not repeating headers (since AUD is supposed to be the first NAL
 in the access
   * unit) */
 @@ -225,12 +223,15 @@
  m_frameFilter.m_sao.m_refDepth = 2 + !IS_REFERENCED(slice);
  break;
  }
 -m_frameFilter.start(m_frame);

  // Clip slice QP to 0-51 spec range before encoding
  qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
  slice-m_sliceQp = qp;

 +m_initSliceContext.resetEntropy(slice);
 +
 +m_frameFilter.start(m_frame);
 +
  if (m_frame-m_lowres.bKeyframe)
  {
  if (m_param-bEmitHRDSEI)
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Entropy: Replaced getCtxQtCbf() with table

2014-09-02 Thread Deepthi Nandakumar
Is it also possible to change codeQtCbf so that only uint32_t cbf needs to
be passed in, and not entire TComDataCU*? This will help since our coming
refactors will get rid of TComDataCU.




On Mon, Sep 1, 2014 at 3:17 PM, Steve Borho st...@borho.org wrote:

 On 09/01, as...@multicorewareinc.com wrote:
  # HG changeset patch
  # User Ashok Kumar Mishraas...@multicorewareinc.com
  # Date 1409562155 -19800
  #  Mon Sep 01 14:32:35 2014 +0530
  # Node ID e7f58267b15c2d6d477bd370f936d00377d2bbc3
  # Parent  4d96eb40f4d6e5cd0883a0a61f20bf00c07ed8f0
  Entropy: Replaced getCtxQtCbf() with table

 nice! but a nit

  diff -r 4d96eb40f4d6 -r e7f58267b15c
 source/Lib/TLibCommon/ContextTables.h
  --- a/source/Lib/TLibCommon/ContextTables.h   Thu Aug 28 13:14:34 2014
 +0530
  +++ b/source/Lib/TLibCommon/ContextTables.h   Mon Sep 01 14:32:35 2014
 +0530
  @@ -144,6 +144,7 @@
   #define NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4  3
   #define NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4  1
 
  +static const uint32_t CtxCbf[3][3] = { { 1, 0, 0 }, { 2, 3, 4 }, { 2,
 3, 4} };

 upper-case names are reserved for classes/structs.

 static const uint32_t ctxCbf[MAX_NUM_CHANNEL_TYPE][3] = ...

   static const uint32_t
 significanceMapContextSetStart[MAX_NUM_CHANNEL_TYPE][3] = { { 0,  9, 21 },
 { 0,  9, 12 } };
   static const uint32_t
 significanceMapContextSetSize[MAX_NUM_CHANNEL_TYPE][3]  = { { 9, 12,  6 },
 { 9,  3,  3 } };
   static const uint32_t
 nonDiagonalScan8x8ContextOffset[MAX_NUM_CHANNEL_TYPE]   = {  6, 0  };
  diff -r 4d96eb40f4d6 -r e7f58267b15c
 source/Lib/TLibEncoder/TEncSearch.cpp
  --- a/source/Lib/TLibEncoder/TEncSearch.cpp   Thu Aug 28 13:14:34 2014
 +0530
  +++ b/source/Lib/TLibEncoder/TEncSearch.cpp   Mon Sep 01 14:32:35 2014
 +0530
  @@ -2798,7 +2798,7 @@
   else
   singleCostY = m_rdCost.calcRdCost(nonZeroDistY,
 singleBitsComp[TEXT_LUMA][0]);
   m_entropyCoder-resetBits();
  -m_entropyCoder-codeQtCbfZero(cu, TEXT_LUMA, trMode);
  +m_entropyCoder-codeQtCbfZero(TEXT_LUMA, trMode);

 unrelated to this patch, but why is depth here stored in trMode/trModeC?

   const uint32_t nullBitsY =
 m_entropyCoder-getNumberOfWrittenBits();
   uint64_t nullCostY = 0;
   if (m_rdCost.m_psyRd)
  @@ -2826,7 +2826,7 @@
   else if (checkTransformSkipY)
   {
   m_entropyCoder-resetBits();
  -m_entropyCoder-codeQtCbfZero(cu, TEXT_LUMA, trMode);
  +m_entropyCoder-codeQtCbfZero(TEXT_LUMA, trMode);
   const uint32_t nullBitsY =
 m_entropyCoder-getNumberOfWrittenBits();
   if (m_rdCost.m_psyRd)
   minCost[TEXT_LUMA][0] = m_rdCost.calcPsyRdCost(distY,
 nullBitsY, psyEnergyY);
  @@ -2898,7 +2898,7 @@
   else
   singleCostU =
 m_rdCost.calcRdCost(nonZeroDistU,
 singleBitsComp[TEXT_CHROMA_U][tuIterator.section]);
   m_entropyCoder-resetBits();
  -m_entropyCoder-codeQtCbfZero(cu,
 TEXT_CHROMA_U, trMode);
  +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_U,
 trMode);
   const uint32_t nullBitsU =
 m_entropyCoder-getNumberOfWrittenBits();
   uint64_t nullCostU = 0;
   if (m_rdCost.m_psyRd)
  @@ -2926,7 +2926,7 @@
   else if (checkTransformSkipUV)
   {
   m_entropyCoder-resetBits();
  -m_entropyCoder-codeQtCbfZero(cu, TEXT_CHROMA_U,
 trModeC);
  +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_U,
 trModeC);
   const uint32_t nullBitsU =
 m_entropyCoder-getNumberOfWrittenBits();
   if (m_rdCost.m_psyRd)
   minCost[TEXT_CHROMA_U][tuIterator.section] =
 m_rdCost.calcPsyRdCost(distU, nullBitsU, psyEnergyU);
  @@ -2980,7 +2980,7 @@
   else
   singleCostV =
 m_rdCost.calcRdCost(nonZeroDistV,
 singleBitsComp[TEXT_CHROMA_V][tuIterator.section]);
   m_entropyCoder-resetBits();
  -m_entropyCoder-codeQtCbfZero(cu,
 TEXT_CHROMA_V, trMode);
  +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_V,
 trMode);
   const uint32_t nullBitsV =
 m_entropyCoder-getNumberOfWrittenBits();
   uint64_t nullCostV = 0;
   if (m_rdCost.m_psyRd)
  @@ -3008,7 +3008,7 @@
   else if (checkTransformSkipUV)
   {
   m_entropyCoder-resetBits();
  -m_entropyCoder-codeQtCbfZero(cu, TEXT_CHROMA_V,
 trModeC);
  +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_V,
 trModeC);
   const uint32_t nullBitsV =
 m_entropyCoder-getNumberOfWrittenBits();

Re: [x265] [PATCH] fix: hash/binary mismatch for new CU structure holds CU-specific info

2014-09-03 Thread Deepthi Nandakumar
Thanks, Ashok. Queued.


On Wed, Sep 3, 2014 at 8:20 PM, as...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Ashok Kumar Mishraas...@multicorewareinc.com
 # Date 1409753842 -19800
 #  Wed Sep 03 19:47:22 2014 +0530
 # Node ID 00c381bf615840180bb6ce924dc9be84c060938f
 # Parent  62c4779fb0bb35d5d8a69678e9e8aa81272f0115
 fix: hash/binary mismatch for new CU structure holds CU-specific info

 diff -r 62c4779fb0bb -r 00c381bf6158 source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Aug 28 13:14:34 2014 +0530
 +++ b/source/encoder/analysis.cpp   Wed Sep 03 19:47:22 2014 +0530
 @@ -420,13 +420,13 @@
  checkIntra(outBestCU, outTempCU, SIZE_2Nx2N, cu);
  if (depth == g_maxCUDepth)
  {
 -checkIntra(outBestCU, outTempCU, SIZE_NxN, cu);
 +checkIntra(outBestCU, outTempCU, SIZE_NxN, cu);
  }
  else
  {
 -m_entropyCoder-resetBits();
 -m_entropyCoder-codeSplitFlag(outBestCU, 0, depth);
 -outBestCU-m_totalBits +=
 m_entropyCoder-getNumberOfWrittenBits(); // split bits
 +m_entropyCoder-resetBits();
 +m_entropyCoder-codeSplitFlag(outBestCU, 0, depth);
 +outBestCU-m_totalBits +=
 m_entropyCoder-getNumberOfWrittenBits(); // split bits
  }
  if (m_rdCost.m_psyRd)
  outBestCU-m_totalPsyCost =
 m_rdCost.calcPsyRdCost(outBestCU-m_totalDistortion,
 outBestCU-m_totalBits, outBestCU-m_psyEnergy);
 @@ -437,6 +437,7 @@
  // copy original YUV samples in lossless mode
  if (outBestCU-isLosslessCoded(0))
  fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
 +
  // further split
  if (cu_split_flag)
  {
 @@ -445,18 +446,17 @@
  TComDataCU* subTempPartCU = m_tempCU[nextDepth];
  for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++)
  {
 -int qp = outTempCU-getQP(0);
 -subBestPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth,
 qp); // clear sub partition datas or init.
 -if (cu-flags  CU::PRESENT)
 +CU *child_cu = cuPicsym-m_CULocalData + cu-childIdx +
 partUnitIdx;
 +
 +if (child_cu-flags  CU::PRESENT)
  {
 +int qp = outTempCU-getQP(0);
 +subBestPartCU-initSubCU(outTempCU, partUnitIdx,
 nextDepth, qp); // clear sub partition datas or init.
  subTempPartCU-initSubCU(outTempCU, partUnitIdx,
 nextDepth, qp); // clear sub partition datas or init.
  if (0 == partUnitIdx) //initialize RD with previous depth
 buffer

  
 m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
  else

  
 m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
 -CU *child_cu = cuPicsym-m_CULocalData + cu-childIdx +
 partUnitIdx;
 -if (!(child_cu-flags  CU::PRESENT))
 -continue;

  compressIntraCU(subBestPartCU, subTempPartCU, nextDepth,
 bInsidePicture, cuPicsym, child_cu);
  outTempCU-copyPartFrom(subBestPartCU, partUnitIdx,
 nextDepth); // Keep best part data to current temporary data.
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Resolve gcc warnings

2014-09-03 Thread Deepthi Nandakumar
Thanks, pushed.


On Thu, Sep 4, 2014 at 4:06 AM, dtyx...@gmail.com wrote:

 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1409783716 25200
 # Node ID fed3ddf3f84f3c4e4c72bcc0818a07a99ec7312e
 # Parent  62c4779fb0bb35d5d8a69678e9e8aa81272f0115
 Resolve gcc warnings

 * more parenthesis for macro
 * changed signed to unsigned int

 diff -r 62c4779fb0bb -r fed3ddf3f84f source/common/common.h
 --- a/source/common/common.hThu Aug 28 13:14:34 2014 +0530
 +++ b/source/common/common.hWed Sep 03 15:35:16 2014 -0700
 @@ -291,7 +291,7 @@
  }
  };

 -#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = (bitfield) 
 (~(flag)) | ((~((value) - 1))  (flag))
 +#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) 
 (~(flag))) | ((~((value) - 1))  (flag))
  #define CU_GET_FLAG(bitfield, flag) (!!((bitfield)  (flag)))
  }
  /* defined in common.cpp */
 diff -r 62c4779fb0bb -r fed3ddf3f84f source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Aug 28 13:14:34 2014 +0530
 +++ b/source/encoder/analysis.cpp   Wed Sep 03 15:35:16 2014 -0700
 @@ -258,7 +258,7 @@

  void Analysis::loadCTUData(TComDataCU* parentCU)
  {
 -int8_t cuRange[2]= {MIN_LOG2_CU_SIZE, g_log2Size[m_param-maxCUSize]};
 +uint8_t cuRange[2]= {MIN_LOG2_CU_SIZE,
 g_log2Size[m_param-maxCUSize]};

  // Initialize the coding blocks inside the CTB
  for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx =
 cuRange[0]; rangeIdx--)
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2 of 3] asm: optimize nquant by PSIGND, improve 13k cycles - 11k cycles

2014-09-03 Thread Deepthi Nandakumar
Min,

Praveen has sent a number of patches on changing the entire interface for
quant such that the coefficients are now 16-bit instead of 32-bit. Your
patches still assume they are 32-bit?

Can you review all his patches (8-10 patches) and see if we're moving in
the right direction?

Thanks,
Deepthi



On Thu, Sep 4, 2014 at 5:07 AM, Min Chen chenm...@163.com wrote:

 # HG changeset patch
 # User Min Chen chenm...@163.com
 # Date 1409787419 25200
 # Node ID 4ca9e972f48cb4530ca7181ad7cec351568a99b3
 # Parent  94bd00d1af5d8c5f6f26f97c50a727588a860714
 asm: optimize nquant by PSIGND, improve 13k cycles - 11k cycles

 diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/dct.cpp
 --- a/source/common/dct.cpp Wed Sep 03 16:36:44 2014 -0700
 +++ b/source/common/dct.cpp Wed Sep 03 16:36:59 2014 -0700
 @@ -801,6 +801,10 @@
  {
  uint32_t numSig = 0;

 +X265_CHECK((numCoeff % 16) == 0, number of quant coeff is not
 multiple of 4x4\n);
 +X265_CHECK((uint32_t)add  ((uint32_t)1  qBits), 2 ^ qBits less
 than add\n);
 +X265_CHECK(((intptr_t)quantCoeff  15) == 0, quantCoeff buffer not
 aligned\n);
 +
  for (int blockpos = 0; blockpos  numCoeff; blockpos++)
  {
  int level = coef[blockpos];
 diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/x86/pixel-util8.asm
 --- a/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:44 2014 -0700
 +++ b/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:59 2014 -0700
 @@ -941,55 +941,47 @@
  ; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int
 qBits, int add, int numCoeff);

  
 ;-
  INIT_XMM sse4
 -cglobal nquant, 4,5,8
 +cglobal nquant, 3,5,8
  movdm6, r4m
  mov r4d, r5m
  pxorm7, m7  ; m7 = numZero
 -movdm5, r3d ; m5 = qbits
 +movdm5, r3m ; m5 = qbits
  pshufd  m6, m6, 0   ; m6 = add
  mov r3d, r4d; r3 = numCoeff
  shr r4d, 3
 +
  .loop:
  movum0, [r0]; m0 = level
  movum1, [r0 + 16]   ; m1 = level
 -movum2, [r1]; m2 = qcoeff
 -movum3, [r1 + 16]   ; m3 = qcoeff
 +
 +pabsd   m2, m0
 +pmulld  m2, [r1]; m4 = tmpLevel1
 +paddd   m2, m6
 +psrad   m2, m5  ; m4 = level1
 +psignd  m2, m0  ; restore sign
 +
 +pabsd   m3, m1
 +pmulld  m3, [r1 + 16]   ; m4 = tmpLevel1
 +paddd   m3, m6
 +psrad   m3, m5  ; m4 = level1
 +psignd  m3, m1  ; restore sign
  add r0, 32
  add r1, 32

 -pxorm4, m4
 -pcmpgtd m4, m0  ; m4 = sign
 -pabsd   m0, m0
 -pmulld  m0, m2  ; m0 = tmpLevel1
 -paddd   m0, m6
 -psrad   m0, m5  ; m0 = level1
 -pxorm0, m4
 -psubd   m0, m4
 -
 -pxorm4, m4
 -pcmpgtd m4, m1  ; m4 = sign
 -pabsd   m1, m1
 -pmulld  m1, m3  ; m1 = tmpLevel1
 -paddd   m1, m6
 -psrad   m1, m5  ; m1 = level1
 -pxorm1, m4
 -psubd   m1, m4
 -
 -packssdwm0, m0
 -packssdwm1, m1
 -pmovsxwdm0, m0
 +packssdwm2, m3
 +pmovsxwdm0, m2
 +movhlps m1, m2
  pmovsxwdm1, m1

 -movu[r2], m0
 +movu[r2 ], m0
  movu[r2 + 16], m1
  add r2, 32
 +
 +pxorm4, m4
 +pcmpeqw m2, m4
 +psubw   m7, m2
 +
  dec r4d
 -
 -packssdwm0, m1
 -pxorm4, m4
 -pcmpeqw m0, m4
 -psubw   m7, m0
 -
  jnz .loop

  packuswbm7, m7
 @@ -997,10 +989,8 @@
  mov eax, r3d
  movdr4d, m7
  sub eax, r4d; numSig
 -
  RET

 -

  
 ;-
  ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num,
 int scale, int shift)

  
 ;-

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled

2014-09-08 Thread Deepthi Nandakumar
On Sat, Sep 6, 2014 at 10:08 PM, Steve Borho st...@borho.org wrote:

 # HG changeset patch
 # User Steve Borho st...@borho.org
 # Date 1409932577 -7200
 #  Fri Sep 05 17:56:17 2014 +0200
 # Node ID 07d69bce1760a28be1b1ee1821dfeb3335602422
 # Parent  795878af39730deb24e2ee0e585c625084bb031b
 frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled

 This is a performance optimization, it allows the encoder to generate the
 final
 bitstream of each CTU as it is compressed and cache hot.

 When SAO is enabled, SAO analysis must be performed and coded at the start
 of
 the CTU but SAO analysis currently requires surrounding CTUs to be encoded
 making the second pass unavoidable.

 diff -r 795878af3973 -r 07d69bce1760 source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp   Fri Sep 05 16:03:44 2014 +0200
 +++ b/source/encoder/frameencoder.cpp   Fri Sep 05 17:56:17 2014 +0200
 @@ -192,16 +192,6 @@
  }
  }

 -uint32_t numSubstreams = m_param-bEnableWavefront ?
 m_frame-getPicSym()-getFrameHeightInCU() : 1;
 -if (!m_outStreams)
 -{
 -m_outStreams = new Bitstream[numSubstreams];
 -m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
 -}
 -else
 -for (uint32_t i = 0; i  numSubstreams; i++)
 -m_outStreams[i].resetBits();
 -
  /* Get the QP for this frame from rate control. This call may block
 until
   * frames ahead of it in encode order have called rateControlEnd() */
  int qp = m_top-m_rateControl-rateControlStart(m_frame, m_rce,
 m_top);
 @@ -214,6 +204,24 @@

  m_frameFilter.start(m_frame, m_initSliceContext, qp);

 +// reset entropy coders
 +m_entropyCoder.load(m_initSliceContext);
 +for (int i = 0; i  m_numRows; i++)
 +m_rows[i].init(m_initSliceContext);
 +
 +uint32_t numSubstreams = m_param-bEnableWavefront ?
 m_frame-getPicSym()-getFrameHeightInCU() : 1;
 +if (!m_outStreams)
 +{
 +m_outStreams = new Bitstream[numSubstreams];
 +m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
 +if (!m_param-bEnableSAO)
 +for (uint32_t i = 0; i  numSubstreams; i++)
 +
 m_rows[i].rdEntropyCoders[0][CI_CURR_BEST].setBitstream(m_outStreams[i]);
 +}
 +else
 +for (uint32_t i = 0; i  numSubstreams; i++)
 +m_outStreams[i].resetBits();
 +
  if (m_frame-m_lowres.bKeyframe)
  {
  if (m_param-bEmitHRDSEI)
 @@ -328,7 +336,7 @@
  m_entropyCoder.setBitstream(m_bs);
  m_entropyCoder.codeSliceHeader(slice);

 -// re-encode each row of CUs for the final time (TODO: get rid of
 this second pass)
 +// finish encode of each CTU row
  encodeSlice();

  // serialize each row, record final lengths in slice header
 @@ -409,8 +417,40 @@
  const uint32_t widthInLCUs =
 m_frame-getPicSym()-getFrameWidthInCU();
  const uint32_t lastCUAddr = (slice-m_endCUAddr +
 m_frame-getNumPartInCU() - 1) / m_frame-getNumPartInCU();
  const int numSubstreams = m_param-bEnableWavefront ?
 m_frame-getPicSym()-getFrameHeightInCU() : 1;
 +
 +if (!m_param-bEnableSAO)
 +{
 +/* terminate each row and collect stats */
 +for (uint32_t cuAddr = 0; cuAddr  lastCUAddr; cuAddr++)
 +{
 +uint32_t col = cuAddr % widthInLCUs;
 +
 +if (m_param-bEnableWavefront  col == widthInLCUs - 1)
 +{
 +uint32_t lin = cuAddr / widthInLCUs;
 +uint32_t subStrm = lin % numSubstreams;
 +
 m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1);
 +
 m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish();
 +m_outStreams[subStrm].writeByteAlignment();
 +}
 +
 +// Collect Frame Stats for 2 pass
 +TComDataCU* cu = m_frame-getCU(cuAddr);
 +m_frameStats.mvBits += cu-m_mvBits;
 +m_frameStats.coeffBits += cu-m_coeffBits;
 +m_frameStats.miscBits += cu-m_totalBits - (cu-m_mvBits +
 cu-m_coeffBits);
 +}
 +if (!m_param-bEnableWavefront)
 +{
 +
 m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1);
 +m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish();
 +m_outStreams[0].writeByteAlignment();
 +}
 +
 +return;
 +}
 +
  SAOParam *saoParam = slice-m_pic-getPicSym()-m_saoParam;
 -
  for (uint32_t cuAddr = 0; cuAddr  lastCUAddr; cuAddr++)
  {
  uint32_t col = cuAddr % widthInLCUs;
 @@ -487,11 +527,6 @@
  PPAScopeEvent(FrameEncoder_compressRows);
  Slice* slice = m_frame-m_picSym-m_slice;

 -// reset entropy coders
 -m_entropyCoder.load(m_initSliceContext);
 -for (int i = 0; i  m_numRows; i++)
 -m_rows[i].init(m_initSliceContext);
 -
  m_bAllRowsStop = false;
  m_vbvResetTriggerRow = -1;

 @@ -672,15 +707,17 @@
  }

  if (m_param-bEnableWavefront  col == 0  row  0)
 +{

Re: [x265] [PATCH] search.cpp: fixed type conversion warning

2014-09-09 Thread Deepthi Nandakumar
Thanks, Praveen - an alternative patch pushed to take care of this (silly)
warning.


On Tue, Sep 9, 2014 at 11:29 AM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1410242347 -19800
 # Node ID 5026f08bd7d64ab0ee22dcc98dd034030aa65db9
 # Parent  7510b362ca969d850d33b10acb37f3e5a6d6b7dc
 search.cpp: fixed type conversion warning

 diff -r 7510b362ca96 -r 5026f08bd7d6 source/encoder/search.cpp
 --- a/source/encoder/search.cpp Tue Sep 09 11:07:59 2014 +0530
 +++ b/source/encoder/search.cpp Tue Sep 09 11:29:07 2014 +0530
 @@ -1443,7 +1443,8 @@
  m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]);

  // determine residual for partition
 -puDistY = puCost = 0;
 +puDistY = 0;
 +puCost  = 0;
  xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv,
 predYuv, resiYuv, puDistY, true, puCost, depthRange);

  // check r-d cost
 @@ -1462,7 +1463,8 @@
  m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]);

  // determine residual for partition
 -puDistY = puCost = 0;
 +puDistY = 0;
 +puCost  = 0;
  xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv,
 predYuv, resiYuv, puDistY, false, puCost, depthRange);

  overallDistY += (puCost = bestPUCost) ? bestPUDistY : puDistY;
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] copy_cnt: enable avx2 version of asm code

2014-09-11 Thread Deepthi Nandakumar
Would be better to combine this asm enable with the corresponding asm patch
itself. I have pushed copy_cnt8, and enabled only that for now.

On Wed, Sep 10, 2014 at 3:28 PM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1410343073 -19800
 # Node ID 2cd4a13086740728559fde3a176953e9aa4c0782
 # Parent  7bc4db02ccc728f6e2ddedd036c96e3d37b90f22
 copy_cnt: enable avx2 version of asm code

 diff -r 7bc4db02ccc7 -r 2cd4a1308674 source/common/x86/asm-primitives.cpp
 --- a/source/common/x86/asm-primitives.cpp  Wed Sep 10 14:45:33 2014
 +0530
 +++ b/source/common/x86/asm-primitives.cpp  Wed Sep 10 15:27:53 2014
 +0530
 @@ -1724,14 +1724,10 @@
  p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
  p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2;

 -/* Need to update assembly code as per changed interface of the
 copy_cnt primitive, once
 - * code is updated, avx2 version will be enabled */
 -/*
  p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
  p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
  p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
  p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
 -*/

  p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2;
  p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2;
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] copy_cnt: enable avx2 version of asm code

2014-09-11 Thread Deepthi Nandakumar
Ok, thanks.

On Thu, Sep 11, 2014 at 11:47 AM, Praveen Tiwari 
prav...@multicorewareinc.com wrote:

 You can push 16x16 and 32x32 also they are good in performance but they
 need a bit more improvement, I will be sending improvement patch soon.

 Regards,
 Praveen Tiwari

 On Thu, Sep 11, 2014 at 11:29 AM, Deepthi Nandakumar 
 deep...@multicorewareinc.com wrote:

 Would be better to combine this asm enable with the corresponding asm
 patch itself. I have pushed copy_cnt8, and enabled only that for now.

 On Wed, Sep 10, 2014 at 3:28 PM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1410343073 -19800
 # Node ID 2cd4a13086740728559fde3a176953e9aa4c0782
 # Parent  7bc4db02ccc728f6e2ddedd036c96e3d37b90f22
 copy_cnt: enable avx2 version of asm code

 diff -r 7bc4db02ccc7 -r 2cd4a1308674 source/common/x86/asm-primitives.cpp
 --- a/source/common/x86/asm-primitives.cpp  Wed Sep 10 14:45:33 2014
 +0530
 +++ b/source/common/x86/asm-primitives.cpp  Wed Sep 10 15:27:53 2014
 +0530
 @@ -1724,14 +1724,10 @@
  p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
  p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2;

 -/* Need to update assembly code as per changed interface of the
 copy_cnt primitive, once
 - * code is updated, avx2 version will be enabled */
 -/*
  p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
  p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
  p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
  p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
 -*/

  p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2;
  p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2;
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] sao: some cleanups

2014-09-12 Thread Deepthi Nandakumar
Thanks, queued for testing.

On Fri, Sep 12, 2014 at 7:34 AM, Satoshi Nakagawa nakagawa...@oki.com
wrote:

 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1410487314 -32400
 #  Fri Sep 12 11:01:54 2014 +0900
 # Node ID 8a2312df90f99b8b479940141c6dafa4b96581cf
 # Parent  7e29b10982d2eb7fd79f581d6f04184522ba
 sao: some cleanups

 diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/common.h
 --- a/source/common/common.hThu Sep 11 19:24:28 2014 +0530
 +++ b/source/common/common.hFri Sep 12 11:01:54 2014 +0900
 @@ -200,6 +200,8 @@

  namespace x265 {

 +enum { SAO_NUM_OFFSET = 4 };
 +
  // NOTE: MUST be alignment to 16 or 32 bytes for asm code
  struct NoiseReduction
  {
 @@ -215,9 +217,8 @@
  enum { NUM_DOWN_PART = 4 };

  int bestType;
 -int length;
  int subTypeIdx;  // indicates EO class or BO band position
 -int offset[4];
 +int offset[SAO_NUM_OFFSET];
  int startCUX;
  int startCUY;
  int endCUX;
 @@ -245,10 +246,9 @@
  bool mergeLeftFlag;
  int  typeIdx;
  int  subTypeIdx;// indicates EO class or BO band position
 -int  offset[4];
 +int  offset[SAO_NUM_OFFSET];
  int  partIdx;
  int  partIdxTmp;
 -int  length;

  void reset()
  {
 diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/x86/loopfilter.asm
 --- a/source/common/x86/loopfilter.asm  Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/common/x86/loopfilter.asm  Fri Sep 12 11:01:54 2014 +0900
 @@ -44,7 +44,7 @@
  pslldq  m0,15  ; m0 = [iSignLeft x .. x]
  pcmpeqb m4,m4  ; m4 = [pb -1]
  pxorm5,m5  ; m5 = 0
 -movum6,[r1]; m6 = m_iOffsetEo
 +movhm6,[r1]; m6 = m_offsetEo

  .loop:
  movum7,[r0]; m1 = pRec[x]
 diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/entropy.cpp
 --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.cppFri Sep 12 11:01:54 2014 +0900
 @@ -879,19 +879,19 @@

  if (symbol)
  {
 -if (saoLcuParam-typeIdx  4  compIdx != 2)
 +if (saoLcuParam-typeIdx  SAO_BO  compIdx != 2)
  saoLcuParam-subTypeIdx = saoLcuParam-typeIdx;

  int offsetTh = 1  X265_MIN(X265_DEPTH - 5, 5);
  if (saoLcuParam-typeIdx == SAO_BO)
  {
 -for (i = 0; i  saoLcuParam-length; i++)
 +for (i = 0; i  SAO_BO_LEN; i++)
  {
  uint32_t absOffset = ((saoLcuParam-offset[i]  0) ?
 -saoLcuParam-offset[i] : saoLcuParam-offset[i]);
  codeSaoMaxUvlc(absOffset, offsetTh - 1);
  }

 -for (i = 0; i  saoLcuParam-length; i++)
 +for (i = 0; i  SAO_BO_LEN; i++)
  {
  if (saoLcuParam-offset[i] != 0)
  {
 @@ -903,7 +903,7 @@
  symbol = (uint32_t)(saoLcuParam-subTypeIdx);
  codeSaoUflc(5, symbol);
  }
 -else if (saoLcuParam-typeIdx  4)
 +else // if (saoLcuParam-typeIdx  SAO_BO)
  {
  codeSaoMaxUvlc(saoLcuParam-offset[0], offsetTh - 1);
  codeSaoMaxUvlc(saoLcuParam-offset[1], offsetTh - 1);
 diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.cpp
 --- a/source/encoder/sao.cppThu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/sao.cppFri Sep 12 11:01:54 2014 +0900
 @@ -79,26 +79,13 @@
  341, // level 4
  };

 -const uint32_t SAO::s_eoTable[9] =
 +const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
  {
  1, // 0
  2, // 1
  0, // 2
  3, // 3
 -4, // 4
 -0, // 5
 -0, // 6
 -0, // 7
 -0
 -};
 -
 -const int SAO::s_numClass[MAX_NUM_SAO_TYPE] =
 -{
 -SAO_EO_LEN,
 -SAO_EO_LEN,
 -SAO_EO_LEN,
 -SAO_EO_LEN,
 -SAO_BO_LEN
 +4  // 4
  };

  SAO::SAO()
 @@ -122,8 +109,6 @@
  m_clipTable = NULL;
  m_clipTableBase = NULL;
  m_offsetBo = NULL;
 -m_chromaOffsetBo = NULL;
 -m_tableBo = NULL;
  m_tmpU1[0] = NULL;
  m_tmpU1[1] = NULL;
  m_tmpU1[2] = NULL;
 @@ -162,18 +147,12 @@
   * m_numTotalParts must allow for sufficient storage in any allocated
 arrays */
  m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]);

 -int pixelRange = 1  X265_DEPTH;
 -int boRangeShift = X265_DEPTH - SAO_BO_BITS;
 -pixel maxY = (1  X265_DEPTH) - 1;
 -pixel minY = 0;
 -pixel rangeExt = maxY  1;
 +const pixel maxY = (1  X265_DEPTH) - 1;
 +const pixel rangeExt = maxY  1;
  int numLcu = m_numCuInWidth * m_numCuInHeight;

 -CHECKED_MALLOC(m_tableBo, pixel, pixelRange);
 -
 -CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
 -CHECKED_MALLOC(m_offsetBo,int, maxY + 2 * rangeExt);
 -CHECKED_MALLOC(m_chromaOffsetBo , int, maxY + 2 * rangeExt);
 +CHECKED_MALLOC(m_clipTableBase,  pixel, maxY + 2 * rangeExt);
 +

Re: [x265] [PATCH] Resolved gcc compiler error of mismatched type

2014-09-12 Thread Deepthi Nandakumar
Thanks, pushed.

On Fri, Sep 12, 2014 at 5:57 AM, dtyx...@gmail.com wrote:

 # HG changeset patch
 # User David T Yuen dtyx...@gmail.com
 # Date 1410481540 25200
 # Node ID 37b199156dfc27874205d6d7fadb71a00e3257d9
 # Parent  7e29b10982d2eb7fd79f581d6f04184522ba
 Resolved gcc compiler error of mismatched type

 diff -r 7e29b10982d2 -r 37b199156dfc source/x265.cpp
 --- a/source/x265.cpp   Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/x265.cpp   Thu Sep 11 17:25:40 2014 -0700
 @@ -788,7 +788,7 @@

  void CLIOptions::writeAnalysisFile(x265_picture* pic, x265_param *p)
  {
 -fpos_t seekTo = pic-poc * this-analysisRecordSize;
 +uint64_t seekTo = pic-poc * this-analysisRecordSize;
  fseeko(this-analysisFile, seekTo, SEEK_SET);
  fwrite(p-sourceWidth, sizeof(int), 1, this-analysisFile);
  fwrite(p-sourceHeight, sizeof(int), 1, this-analysisFile);
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH RFC] analysis: add CU specific details to encodeCU()

2014-09-14 Thread Deepthi Nandakumar
On Fri, Sep 12, 2014 at 6:05 PM, santhosh...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Santhoshini Sekar santhosh...@multicorewareinc.com
 # Date 1410525310 -19800
 #  Fri Sep 12 18:05:10 2014 +0530
 # Node ID bf4ebe5df0cab013e4462597b55bd505b2a6a71a
 # Parent  7e29b10982d2eb7fd79f581d6f04184522ba
 analysis: add CU specific details to encodeCU()

 diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/analysis.cpp   Fri Sep 12 18:05:10 2014 +0530
 @@ -301,7 +301,6 @@
  {
  if (cu-m_slice-m_pps-bUseDQP)
  m_bEncodeDQP = true;
 -loadCTUData(cu);

  // initialize CU data
  m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
 diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.cpp
 --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.cppFri Sep 12 18:05:10 2014 +0530
 @@ -481,14 +481,14 @@
  }
  }

 -void Entropy::encodeCTU(TComDataCU* cu)
 +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData)
  {
  bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP;
 -encodeCU(cu, 0, 0, false, bEncodeDQP);
 +encodeCU(cu, 0, 0, bEncodeDQP, cuData);
  }

  /* encode a CU block recursively */
 -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bInsidePicture, bool bEncodeDQP)
 +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bEncodeDQP, CU* cuData)
  {
  Frame* pic = cu-m_pic;
  Slice* slice = cu-m_slice;
 @@ -496,30 +496,26 @@
  if (depth = slice-m_pps-maxCuDQPDepth  slice-m_pps-bUseDQP)
  bEncodeDQP = true;

 -if (!bInsidePicture)
 +int cu_split_flag = !(cuData-flags  CU::LEAF);
 +int cu_unsplit_flag = !(cuData-flags  CU::SPLIT_MANDATORY);
 +
 +uint32_t xmax = slice-m_sps-picWidthInLumaSamples  -
 cu-getCUPelX();
 +uint32_t ymax = slice-m_sps-picHeightInLumaSamples -
 cu-getCUPelY();
 +
 +if (!cu_unsplit_flag)
  {
 -uint32_t xmax = slice-m_sps-picWidthInLumaSamples  -
 cu-getCUPelX();
 -uint32_t ymax = slice-m_sps-picHeightInLumaSamples -
 cu-getCUPelY();
 -uint32_t cuSize = g_maxCUSize  depth;
 -
 -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax 
 -  g_zscanToPelY[absPartIdx] + cuSize = ymax);
 -
 -if (!bInsidePicture)
 +uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))  2;
 +for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
  {
 -uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))
  2;
 -for (uint32_t partUnitIdx = 0; partUnitIdx  4;
 partUnitIdx++, absPartIdx += qNumParts)
 -{
 -if (g_zscanToPelX[absPartIdx]  xmax 
 g_zscanToPelY[absPartIdx]  ymax)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 -}
 -
 -return;
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +if (g_zscanToPelX[absPartIdx]  xmax 
 g_zscanToPelY[absPartIdx]  ymax)


Most of the patch looks correct, but can't the above if-check also replaced
by one of the childCU flags?


 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
  }
 +return;
  }

  // We need to split, so don't try these modes.
 -if (bInsidePicture  depth  g_maxCUDepth)
 +if (cu_split_flag)
  codeSplitFlag(cu, absPartIdx, depth);

  if (depth  cu-getDepth(absPartIdx)  depth  g_maxCUDepth)
 @@ -527,7 +523,10 @@
  uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))  2;

  for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 +{
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
 +}
  return;
  }

 diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.h
 --- a/source/encoder/entropy.h  Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.h  Fri Sep 12 18:05:10 2014 +0530
 @@ -148,7 +148,7 @@
  void codeShortTermRefPicSet(RPS* rps);
  void finishSlice() { encodeBinTrm(1); finish();
 dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); }

 -void encodeCTU(TComDataCU* cu);
 +void encodeCTU(TComDataCU* cu, CU *cuData);
  void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
  void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int
 ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int
 allowMergeLeft, int allowMergeUp);
  void codeSaoMerge(uint32_t code)   { encodeBin(code,
 m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
 @@ -193,7 +193,7 @@
  void 

Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance

2014-09-14 Thread Deepthi Nandakumar
This significantly changes outputs for P and B frames. Higher bitrates and
higher SSIM. Lets do full regression testing on this - and compare the
bitrate/ssim for all combinations to be reasonably sure there are no bugs.

On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Ashok Kumar Mishraas...@multicorewareinc.com
 # Date 1410341620 -19800
 #  Wed Sep 10 15:03:40 2014 +0530
 # Node ID d8be3c38915d4a628b804522da8946a152041203
 # Parent  cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f
 Search: remove redundant encode coefficients in intra for performance

 diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Sep 11 17:25:40 2014 -0700
 +++ b/source/encoder/analysis.cpp   Wed Sep 10 15:03:40 2014 +0530
 @@ -1840,6 +1840,7 @@
  void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv,
 TComYuv* predYuv,  ShortYuv* outResiYuv, TComYuv* outReconYuv)
  {
  uint64_t puCost = 0;
 +uint32_t puBits = 0;
  uint32_t depth = cu-getDepth(0);
  uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;

 @@ -1851,7 +1852,7 @@
  uint32_t tuDepthRange[2];
  cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);

 -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
 predYuv, outResiYuv, false, puCost, tuDepthRange);
 +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
 predYuv, outResiYuv, false, puCost, puBits, tuDepthRange);
  xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);

  //=== update PU data 
 diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp
 --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700
 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530
 @@ -111,47 +111,6 @@
  return false;
  }

 -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t depthRange[2])
 -{
 -uint32_t fullDepth  = cu-getDepth(0) + trDepth;
 -uint32_t trMode = cu-getTransformIdx(absPartIdx);
 -uint32_t subdiv = (trMode  trDepth ? 1 : 0);
 -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
 -
 -if (cu-getPredictionMode(0) == MODE_INTRA  cu-getPartitionSize(0)
 == SIZE_NxN  trDepth == 0)
 -{
 -X265_CHECK(subdiv, subdivision not present\n);
 -}
 -else if (log2TrSize  *(depthRange + 1))
 -{
 -X265_CHECK(subdiv, subdivision not present\n);
 -}
 -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize)
 -{
 -X265_CHECK(!subdiv, subdivision present\n);
 -}
 -else if (log2TrSize == *depthRange)
 -{
 -X265_CHECK(!subdiv, subdivision present\n);
 -}
 -else
 -{
 -X265_CHECK(log2TrSize  *depthRange, transform size too
 small\n);
 -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
 -}
 -
 -if (subdiv)
 -{
 -uint32_t qtPartNum = cu-m_pic-getNumPartInCU()  ((fullDepth +
 1)  1);
 -for (uint32_t part = 0; part  4; part++)
 -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part *
 qtPartNum, depthRange);
 -
 -return;
 -}
 -
 -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
 -}
 -
  void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t
 height)
  {
  uint32_t fullDepth  = cu-getDepth(0) + trDepth;
 @@ -183,32 +142,6 @@
  }
  }

 -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx)
 -{
 -const TextType ttype = TEXT_LUMA;
 -
 -if (!cu-getCbf(absPartIdx, ttype, trDepth))
 -return;
 -
 -uint32_t fullDepth = cu-getDepth(0) + trDepth;
 -uint32_t trMode= cu-getTransformIdx(absPartIdx);
 -
 -if (trMode  trDepth)
 -{
 -uint32_t qtPartNum = cu-m_pic-getNumPartInCU()  ((fullDepth +
 1)  1);
 -for (uint32_t part = 0; part  4; part++)
 -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part *
 qtPartNum);
 -
 -return;
 -}
 -
 -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
 -uint32_t qtLayer= log2TrSize - 2;
 -uint32_t coeffOffset = absPartIdx  LOG2_UNIT_SIZE * 2;
 -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
 -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize,
 ttype);
 -}
 -
  void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx, TextType ttype)
  {
  if (!cu-getCbf(absPartIdx, ttype, trDepth))
 @@ -316,15 +249,6 @@
  }
  }

 -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t depthRange[2])
 -{
 -m_entropyCoder-resetBits();
 -xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
 -xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange);
 -xEncCoeffQTLuma(cu, trDepth, absPartIdx);
 -return 

Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance

2014-09-15 Thread Deepthi Nandakumar
Sorry, the output mismatch was due to asm. Pushed.

On Sun, Sep 14, 2014 at 4:35 PM, Deepthi Nandakumar 
deep...@multicorewareinc.com wrote:

 This significantly changes outputs for P and B frames. Higher bitrates and
 higher SSIM. Lets do full regression testing on this - and compare the
 bitrate/ssim for all combinations to be reasonably sure there are no bugs.

 On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Ashok Kumar Mishraas...@multicorewareinc.com
 # Date 1410341620 -19800
 #  Wed Sep 10 15:03:40 2014 +0530
 # Node ID d8be3c38915d4a628b804522da8946a152041203
 # Parent  cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f
 Search: remove redundant encode coefficients in intra for performance

 diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Sep 11 17:25:40 2014 -0700
 +++ b/source/encoder/analysis.cpp   Wed Sep 10 15:03:40 2014 +0530
 @@ -1840,6 +1840,7 @@
  void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv,
 TComYuv* predYuv,  ShortYuv* outResiYuv, TComYuv* outReconYuv)
  {
  uint64_t puCost = 0;
 +uint32_t puBits = 0;
  uint32_t depth = cu-getDepth(0);
  uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;

 @@ -1851,7 +1852,7 @@
  uint32_t tuDepthRange[2];
  cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);

 -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
 predYuv, outResiYuv, false, puCost, tuDepthRange);
 +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
 predYuv, outResiYuv, false, puCost, puBits, tuDepthRange);
  xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);

  //=== update PU data 
 diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp
 --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700
 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530
 @@ -111,47 +111,6 @@
  return false;
  }

 -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t depthRange[2])
 -{
 -uint32_t fullDepth  = cu-getDepth(0) + trDepth;
 -uint32_t trMode = cu-getTransformIdx(absPartIdx);
 -uint32_t subdiv = (trMode  trDepth ? 1 : 0);
 -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
 -
 -if (cu-getPredictionMode(0) == MODE_INTRA 
 cu-getPartitionSize(0) == SIZE_NxN  trDepth == 0)
 -{
 -X265_CHECK(subdiv, subdivision not present\n);
 -}
 -else if (log2TrSize  *(depthRange + 1))
 -{
 -X265_CHECK(subdiv, subdivision not present\n);
 -}
 -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize)
 -{
 -X265_CHECK(!subdiv, subdivision present\n);
 -}
 -else if (log2TrSize == *depthRange)
 -{
 -X265_CHECK(!subdiv, subdivision present\n);
 -}
 -else
 -{
 -X265_CHECK(log2TrSize  *depthRange, transform size too
 small\n);
 -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
 -}
 -
 -if (subdiv)
 -{
 -uint32_t qtPartNum = cu-m_pic-getNumPartInCU()  ((fullDepth
 + 1)  1);
 -for (uint32_t part = 0; part  4; part++)
 -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part *
 qtPartNum, depthRange);
 -
 -return;
 -}
 -
 -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
 -}
 -
  void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t
 height)
  {
  uint32_t fullDepth  = cu-getDepth(0) + trDepth;
 @@ -183,32 +142,6 @@
  }
  }

 -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
 absPartIdx)
 -{
 -const TextType ttype = TEXT_LUMA;
 -
 -if (!cu-getCbf(absPartIdx, ttype, trDepth))
 -return;
 -
 -uint32_t fullDepth = cu-getDepth(0) + trDepth;
 -uint32_t trMode= cu-getTransformIdx(absPartIdx);
 -
 -if (trMode  trDepth)
 -{
 -uint32_t qtPartNum = cu-m_pic-getNumPartInCU()  ((fullDepth
 + 1)  1);
 -for (uint32_t part = 0; part  4; part++)
 -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part *
 qtPartNum);
 -
 -return;
 -}
 -
 -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
 -uint32_t qtLayer= log2TrSize - 2;
 -uint32_t coeffOffset = absPartIdx  LOG2_UNIT_SIZE * 2;
 -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
 -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize,
 ttype);
 -}
 -
  void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, TextType ttype)
  {
  if (!cu-getCbf(absPartIdx, ttype, trDepth))
 @@ -316,15 +249,6 @@
  }
  }

 -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth,
 uint32_t absPartIdx, uint32_t depthRange[2])
 -{
 -m_entropyCoder-resetBits();
 -xEncIntraHeaderLuma(cu, trDepth, absPartIdx

Re: [x265] [PATCH] analysis: add CU specific details to encodeCU()

2014-09-15 Thread Deepthi Nandakumar
Thanks, looks good.

On Tue, Sep 16, 2014 at 9:45 AM, santhosh...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Santhoshini Sekar santhosh...@multicorewareinc.com
 # Date 1410840429 -19800
 #  Tue Sep 16 09:37:09 2014 +0530
 # Node ID 50505472d3e33b775c70f2f373e1c15d17e47e66
 # Parent  7e29b10982d2eb7fd79f581d6f04184522ba
 analysis: add CU specific details to encodeCU()

 diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/analysis.cpp   Tue Sep 16 09:37:09 2014 +0530
 @@ -301,7 +301,6 @@
  {
  if (cu-m_slice-m_pps-bUseDQP)
  m_bEncodeDQP = true;
 -loadCTUData(cu);

  // initialize CU data
  m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
 diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.cpp
 --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.cppTue Sep 16 09:37:09 2014 +0530
 @@ -481,14 +481,14 @@
  }
  }

 -void Entropy::encodeCTU(TComDataCU* cu)
 +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData)
  {
  bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP;
 -encodeCU(cu, 0, 0, false, bEncodeDQP);
 +encodeCU(cu, 0, 0, bEncodeDQP, cuData);
  }

  /* encode a CU block recursively */
 -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bInsidePicture, bool bEncodeDQP)
 +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bEncodeDQP, CU* cuData)
  {
  Frame* pic = cu-m_pic;
  Slice* slice = cu-m_slice;
 @@ -496,30 +496,24 @@
  if (depth = slice-m_pps-maxCuDQPDepth  slice-m_pps-bUseDQP)
  bEncodeDQP = true;

 -if (!bInsidePicture)
 +int cuSplitFlag = !(cuData-flags  CU::LEAF);
 +int cuUnsplitFlag = !(cuData-flags  CU::SPLIT_MANDATORY);
 +
 +if (!cuUnsplitFlag)
  {
 -uint32_t xmax = slice-m_sps-picWidthInLumaSamples  -
 cu-getCUPelX();
 -uint32_t ymax = slice-m_sps-picHeightInLumaSamples -
 cu-getCUPelY();
 -uint32_t cuSize = g_maxCUSize  depth;
 -
 -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax 
 -  g_zscanToPelY[absPartIdx] + cuSize = ymax);
 -
 -if (!bInsidePicture)
 +uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))  2;
 +for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
  {
 -uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))
  2;
 -for (uint32_t partUnitIdx = 0; partUnitIdx  4;
 partUnitIdx++, absPartIdx += qNumParts)
 -{
 -if (g_zscanToPelX[absPartIdx]  xmax 
 g_zscanToPelY[absPartIdx]  ymax)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 -}
 -
 -return;
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +int cuPresentFlagChild = !(childCU-flags  CU::PRESENT);
 +if (!cuPresentFlagChild)
 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
  }
 +return;
  }

  // We need to split, so don't try these modes.
 -if (bInsidePicture  depth  g_maxCUDepth)
 +if (cuSplitFlag)
  codeSplitFlag(cu, absPartIdx, depth);

  if (depth  cu-getDepth(absPartIdx)  depth  g_maxCUDepth)
 @@ -527,7 +521,10 @@
  uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1))  2;

  for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 +{
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
 +}
  return;
  }

 diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.h
 --- a/source/encoder/entropy.h  Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.h  Tue Sep 16 09:37:09 2014 +0530
 @@ -148,7 +148,7 @@
  void codeShortTermRefPicSet(RPS* rps);
  void finishSlice() { encodeBinTrm(1); finish();
 dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); }

 -void encodeCTU(TComDataCU* cu);
 +void encodeCTU(TComDataCU* cu, CU *cuData);
  void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
  void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int
 ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int
 allowMergeLeft, int allowMergeUp);
  void codeSaoMerge(uint32_t code)   { encodeBin(code,
 m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
 @@ -193,7 +193,7 @@
  void encodeBinsEP(uint32_t binValues, int numBins);
  void encodeBinTrm(uint32_t binValue);

 -void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth,
 bool bInsidePicture, bool bEncodeDQP);
 +void 

Re: [x265] [PATCH] analysis: add CU specific details to encodeCU()

2014-09-16 Thread Deepthi Nandakumar
Ok, thanks. please send a follow-on patch cleaning up both encodeCU and
compressCu functions.

On Tue, Sep 16, 2014 at 11:32 AM, Santhoshini Sekar 
santhosh...@multicorewareinc.com wrote:



 On Tue, Sep 16, 2014 at 10:56 AM, Deepthi Nandakumar 
 deep...@multicorewareinc.com wrote:



 On Tue, Sep 16, 2014 at 9:45 AM, santhosh...@multicorewareinc.com
 wrote:

 # HG changeset patch
 # User Santhoshini Sekar santhosh...@multicorewareinc.com
 # Date 1410840429 -19800
 #  Tue Sep 16 09:37:09 2014 +0530
 # Node ID 50505472d3e33b775c70f2f373e1c15d17e47e66
 # Parent  7e29b10982d2eb7fd79f581d6f04184522ba
 analysis: add CU specific details to encodeCU()

 diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp   Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/analysis.cpp   Tue Sep 16 09:37:09 2014 +0530
 @@ -301,7 +301,6 @@
  {
  if (cu-m_slice-m_pps-bUseDQP)
  m_bEncodeDQP = true;
 -loadCTUData(cu);

  // initialize CU data
  m_bestCU[0]-initCU(cu-m_pic, cu-getAddr());
 diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.cpp
 --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.cppTue Sep 16 09:37:09 2014 +0530
 @@ -481,14 +481,14 @@
  }
  }

 -void Entropy::encodeCTU(TComDataCU* cu)
 +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData)
  {
  bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP;
 -encodeCU(cu, 0, 0, false, bEncodeDQP);
 +encodeCU(cu, 0, 0, bEncodeDQP, cuData);
  }

  /* encode a CU block recursively */
 -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bInsidePicture, bool bEncodeDQP)
 +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t
 depth, bool bEncodeDQP, CU* cuData)
  {
  Frame* pic = cu-m_pic;
  Slice* slice = cu-m_slice;
 @@ -496,30 +496,24 @@
  if (depth = slice-m_pps-maxCuDQPDepth  slice-m_pps-bUseDQP)
  bEncodeDQP = true;

 -if (!bInsidePicture)
 +int cuSplitFlag = !(cuData-flags  CU::LEAF);
 +int cuUnsplitFlag = !(cuData-flags  CU::SPLIT_MANDATORY);
 +
 +if (!cuUnsplitFlag)
  {
 -uint32_t xmax = slice-m_sps-picWidthInLumaSamples  -
 cu-getCUPelX();
 -uint32_t ymax = slice-m_sps-picHeightInLumaSamples -
 cu-getCUPelY();
 -uint32_t cuSize = g_maxCUSize  depth;
 -
 -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax 
 -  g_zscanToPelY[absPartIdx] + cuSize = ymax);
 -
 -if (!bInsidePicture)
 +uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1)) 
 2;
 +for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
  {
 -uint32_t qNumParts = (pic-getNumPartInCU()  (depth 
 1))  2;
 -for (uint32_t partUnitIdx = 0; partUnitIdx  4;
 partUnitIdx++, absPartIdx += qNumParts)
 -{
 -if (g_zscanToPelX[absPartIdx]  xmax 
 g_zscanToPelY[absPartIdx]  ymax)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 -}
 -
 -return;
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +int cuPresentFlagChild = !(childCU-flags  CU::PRESENT);
 +if (!cuPresentFlagChild)
 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP,
 childCU);
  }
 +return;
  }

  // We need to split, so don't try these modes.
 -if (bInsidePicture  depth  g_maxCUDepth)
 +if (cuSplitFlag)
  codeSplitFlag(cu, absPartIdx, depth);

  if (depth  cu-getDepth(absPartIdx)  depth  g_maxCUDepth)
 @@ -527,7 +521,10 @@
  uint32_t qNumParts = (pic-getNumPartInCU()  (depth  1)) 
 2;

  for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++,
 absPartIdx += qNumParts)
 -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture,
 bEncodeDQP);
 +{
 +CU *childCU = cu-m_CULocalData + cuData-childIdx +
 partUnitIdx;
 +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
 +}
  return;
  }

 diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.h
 --- a/source/encoder/entropy.h  Thu Sep 11 19:24:28 2014 +0530
 +++ b/source/encoder/entropy.h  Tue Sep 16 09:37:09 2014 +0530
 @@ -148,7 +148,7 @@
  void codeShortTermRefPicSet(RPS* rps);
  void finishSlice() { encodeBinTrm(1); finish();
 dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); }

 -void encodeCTU(TComDataCU* cu);
 +void encodeCTU(TComDataCU* cu, CU *cuData);
  void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
  void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int
 ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int
 allowMergeLeft, int allowMergeUp);
  void codeSaoMerge(uint32_t code)   { encodeBin(code,
 m_contextState

  1   2   3   4   >