Re: [x265] Some warnings about YUV 4:2:2 chroma code

2014-04-15 Thread Mario *LigH* Rohkrämer

Resolved by patch 6710 (08d64a70594e).

--

Fun and success!
Mario *LigH* Rohkrämer
mailto:cont...@ligh.de

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate

2014-04-15 Thread Min Chen
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1397561438 -28800
# Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a
# Parent  1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
cleanup: reduce data size and dependency on MotionEstimate

diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800
@@ -111,8 +111,6 @@
 m_rdCost  = rdCost;
 
 initTempBuff(cfg-param-internalCsp);
-m_me.setSearchMethod(cfg-param-searchMethod);
-m_me.setSubpelRefine(cfg-param-subpelRefine);
 
 /* When frame parallelism is active, only 'refLagPixels' of reference 
frames will be guaranteed
  * available for motion reference.  See refLagRows in 
FrameEncoder::compressCTURows() */
@@ -2191,7 +2189,7 @@
 cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = 
m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx;
 
 motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, 
false);
-uint32_t costCand = 
m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), 
m_predTempYuv.getStride());
+uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, 
m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
 uint32_t bitsCand = mergeCand + 1;
 if (mergeCand == m_cfg-param-maxNumMergeCand - 1)
 {
@@ -2314,7 +2312,7 @@
 cu-clipMv(mvCand);
 
 xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, 
ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv);
-uint32_t cost = 
m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
+uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, 
m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
 cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS);
 
 if (bestCost  cost)
@@ -2328,11 +2326,11 @@
 
 int merange = m_cfg-param-searchRange;
 xSetSearchRange(cu, mvp, merange, mvmin, mvmax);
-int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
mvmax, mvp, numMvc, mvc, merange, outmv);
+int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, 
m_cfg-param-subpelRefine);
 
 /* Get total cost of partition, but only include MV bit cost 
once */
-bits += m_me.bitcost(outmv);
-uint32_t cost = (satdCost - m_me.mvcost(outmv)) + 
m_rdCost-getCost(bits);
+bits += m_me.bitcost(outmv, mvp);
+uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + 
m_rdCost-getCost(bits);
 
 /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
 xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, 
cost);
@@ -2368,7 +2366,7 @@
 
 int partEnum = partitionFromSizes(roiWidth, roiHeight);
 primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, 
m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32);
-int satdCost = m_me.bufSATD(avg, roiWidth);
+int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
 
 bidirBits = list[0].bits + list[1].bits + listSelBits[2] - 
(listSelBits[0] + listSelBits[1]);
 bidirCost = satdCost + m_rdCost-getCost(bidirBits);
@@ -2397,17 +2395,15 @@
 intptr_t refStride = m_mref[0][0]-lumaStride;
 
 primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, 
refStride, ref1, refStride, 32);
-satdCost = m_me.bufSATD(avg, roiWidth);
+satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
 
 MV mvp0 = list[0].mvp;
 int mvpIdx0 = list[0].mvpIdx;
-m_me.setMVP(mvp0);
-uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + 
m_me.bitcost(mvzero);
+uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) 
+ m_me.bitcost(mvzero, mvp0);
 
 MV mvp1 = list[1].mvp;
 int mvpIdx1 = list[1].mvpIdx;
-m_me.setMVP(mvp1);
-uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + 
m_me.bitcost(mvzero);
+uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) 
+ m_me.bitcost(mvzero, mvp1);
 
 uint32_t cost = satdCost + m_rdCost-getCost(bits0) + 
m_rdCost-getCost(bits1);
 
@@ -2556,9 +2552,8 @@
 {
 assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred);
 
-m_me.setMVP(mvPred);
 int bestMvpIdx = outMvpIdx;
-int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS;
+int mvBitsOrig = m_me.bitcost(mv, mvPred) + MVP_IDX_BITS;
 int bestMvBits = mvBitsOrig;
 
 for (int mvpIdx = 0; mvpIdx  AMVP_MAX_NUM_CANDS; mvpIdx++)
@@ 

Re: [x265] fix: constrained intra

2014-04-15 Thread Deepthi Nandakumar
Thanks, verified and pushed. Do you mind including some more information on
this option to the docs? This only affects intra decisions in inter-slices,
I think?

Deepthi


On Tue, Apr 15, 2014 at 10:16 AM, Satoshi Nakagawa nakagawa...@oki.comwrote:

  Was it generating hash mistakes without these fixes?

 Yes, when encoding with --constraind-intra option.

  -Original Message-
  From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
  Steve Borho
  Sent: Tuesday, April 15, 2014 1:29 PM
  To: Development for x265
  Subject: Re: [x265] fix: constrained intra
 
  On Mon, Apr 14, 2014 at 10:36 PM, Satoshi Nakagawa nakagawa...@oki.com
  wrote:
   # HG changeset patch
   # User Satoshi Nakagawa nakagawa...@oki.com # Date 1397532876 -32400
   #  Tue Apr 15 12:34:36 2014 +0900
   # Node ID 5dde9f4817813e96116df5b86925fa3fc5eff2a8
   # Parent  08d64a70594ed31cd80046bd4a7e9fa52119be47
   fix: constrained intra
 
  Was it generating hash mistakes without these fixes?
 
  
   diff -r 08d64a70594e -r 5dde9f481781
  source/Lib/TLibCommon/TComPattern.cpp
   --- a/source/Lib/TLibCommon/TComPattern.cpp Mon Apr 14 13:18:18
  2014 -0500
   +++ b/source/Lib/TLibCommon/TComPattern.cpp Tue Apr 15 12:34:36
  2014 +0900
   @@ -79,12 +79,24 @@
int  leftUnits   = cuHeightInUnits  1;
partIdxLB=
  g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) *
  partIdxStride)];
  
   -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
   -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
 partIdxRT,
  (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred())
   +{
   +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu,
  partIdxLT);
   +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT,
  partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +}
   +else
   +{
   +bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu,
  partIdxLT);
   +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   +numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1));
   +numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   +numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT,
  partIdxLB, (bNeighborFlags + leftUnits - 1));
   +numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT,
  partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +}
  
width = cuWidth2 + 1;
height = cuHeight2 + 1;
   @@ -238,12 +250,24 @@
int  leftUnits   = cuHeightInUnits  1;
partIdxLB=
  g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) *
  partIdxStride)];
  
   -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
   -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
 partIdxRT,
  (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
   +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred())
   +{
   +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu,
  partIdxLT);
   +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
   +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
  (bNeighborFlags + leftUnits + 1));
   +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
  partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
   +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
  (bNeighborFlags + leftUnits - 1));
   +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT,
  

Re: [x265] fix: constrained intra

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 11:26 AM, Deepthi Nandakumar
deep...@multicorewareinc.com wrote:
 Thanks, verified and pushed. Do you mind including some more information on
 this option to the docs? This only affects intra decisions in inter-slices,
 I think?

As far as I know, constrained intra in HEVC is the same as it was in
AVC.  When generating intra predictions in inter blocks, only
intra-coded reference pixels are used.  Inter-coded reference pixels
are replaced with intra-coded neighbor pixels or default values.  The
general idea is to block the propagation of reference errors that may
have resulted from lossy signals.

If anyone has a better description, please speak up

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is triggered

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 11:34 AM,  aar...@multicorewareinc.com wrote:
 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1397579661 -19800
 #  Tue Apr 15 22:04:21 2014 +0530
 # Node ID bf48002755a3f5593732ca039ad38a3c799da808
 # Parent  0a95a6bb0f8e71d7a7d0f8e3803ec2878ad558fe
 vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is 
 triggered.

 diff -r 0a95a6bb0f8e -r bf48002755a3 source/encoder/frameencoder.cpp
 --- a/source/encoder/frameencoder.cpp   Tue Apr 15 12:34:36 2014 +0900
 +++ b/source/encoder/frameencoder.cpp   Tue Apr 15 22:04:21 2014 +0530
 @@ -1192,6 +1192,8 @@
  m_pic-m_qpaRc[r] = 0;
  m_pic-m_rowEncodedBits[r] = 0;
  m_pic-m_numEncodedCusPerRow[r] = 0;
 +m_pic-m_rowDiagSatd[r] = 0;
 +m_pic-m_rowDiagIntraSatd[r] = 0;
  }

  m_bAllRowsStop = false;
 @@ -1248,6 +1250,13 @@
  x265_emms();
  double qp = baseQp;

 +/*clear cuCostsForVbv when vbv row reset is triggered. */
 +if (m_pic-m_cuCostsForVbv[cuAddr]  0 || 
 m_pic-m_intraCuCostsForVbv[cuAddr]  0)
 +{
 +m_pic-m_cuCostsForVbv[cuAddr] = 0;
 +m_pic-m_intraCuCostsForVbv[cuAddr] = 0;
 +}
 +

Can we just unconditionally clear these two values?

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] h265.h:Proposed replacement of HM's h265 classes

2014-04-15 Thread Steve Borho
On Fri, Apr 11, 2014 at 5:32 PM, dave dtyx...@gmail.com wrote:
 Below is my proposed replacement for many of the H265 classes and structures
 in TComSlice.h.

 All have at least basic constructor but no methods and most probably won't
 need additional methods that do much more than set fields which can be added
 when needed.

 A few structures have non-H265 fields that at least in HM code appear to be
 useful for encoding frames.

 I have configured the classes so the Encoder class would have an instance
 each H265 class so that when an object is part of multiple H265 structures,
 only one instance is needed.

 Where fields are taken from either the Encoder class or x265_param I have
 used references instead of copying the field or using a pointer.  I don't
 think this will create any concurrency issues since the fields shouldn't
 need to be changed once they are set but if there are then the references
 can be replaced with copies.

 I tried to setup constructors to replace init methods where applicable.

 I haven't started integrating it into x265, when everyone's happy with it I
 can start replacing HM's classes with these.

 The file is also attached to this email

Hi Dave,

I think the general approach is ok, and this will nicely remove about
half of the remaining set/get methods left in our code.

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is triggered

2014-04-15 Thread Aarthi Priya Thirumalai
On Tue, Apr 15, 2014 at 11:31 PM, Steve Borho st...@borho.org wrote:

 On Tue, Apr 15, 2014 at 11:34 AM,  aar...@multicorewareinc.com wrote:
  # HG changeset patch
  # User Aarthi Thirumalai
  # Date 1397579661 -19800
  #  Tue Apr 15 22:04:21 2014 +0530
  # Node ID bf48002755a3f5593732ca039ad38a3c799da808
  # Parent  0a95a6bb0f8e71d7a7d0f8e3803ec2878ad558fe
  vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is
 triggered.
 
  diff -r 0a95a6bb0f8e -r bf48002755a3 source/encoder/frameencoder.cpp
  --- a/source/encoder/frameencoder.cpp   Tue Apr 15 12:34:36 2014 +0900
  +++ b/source/encoder/frameencoder.cpp   Tue Apr 15 22:04:21 2014 +0530
  @@ -1192,6 +1192,8 @@
   m_pic-m_qpaRc[r] = 0;
   m_pic-m_rowEncodedBits[r] = 0;
   m_pic-m_numEncodedCusPerRow[r] = 0;
  +m_pic-m_rowDiagSatd[r] = 0;
  +m_pic-m_rowDiagIntraSatd[r] = 0;
   }
 
   m_bAllRowsStop = false;
  @@ -1248,6 +1250,13 @@
   x265_emms();
   double qp = baseQp;
 
  +/*clear cuCostsForVbv when vbv row reset is triggered. */
  +if (m_pic-m_cuCostsForVbv[cuAddr]  0 ||
 m_pic-m_intraCuCostsForVbv[cuAddr]  0)
  +{
  +m_pic-m_cuCostsForVbv[cuAddr] = 0;
  +m_pic-m_intraCuCostsForVbv[cuAddr] = 0;
  +}
  +

 Can we just unconditionally clear these two values?

 yea, we can remove the if condition and clear them always.. they will be
normally be  0 at this point unless the cus are again encoded after vbv row
resets in which case, we need to clear them necessarily.


 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 6:30 AM, Min Chen chenm...@163.com wrote:
 # HG changeset patch
 # User Min Chen chenm...@163.com
 # Date 1397561438 -28800
 # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a
 # Parent  1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
 cleanup: reduce data size and dependency on MotionEstimate

 diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800
 @@ -111,8 +111,6 @@
  m_rdCost  = rdCost;

  initTempBuff(cfg-param-internalCsp);
 -m_me.setSearchMethod(cfg-param-searchMethod);
 -m_me.setSubpelRefine(cfg-param-subpelRefine);

  /* When frame parallelism is active, only 'refLagPixels' of reference 
 frames will be guaranteed
   * available for motion reference.  See refLagRows in 
 FrameEncoder::compressCTURows() */
 @@ -2191,7 +2189,7 @@
  cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = 
 m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx;

  motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, 
 false);
 -uint32_t costCand = 
 m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), 
 m_predTempYuv.getStride());
 +uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, 
 m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
  uint32_t bitsCand = mergeCand + 1;
  if (mergeCand == m_cfg-param-maxNumMergeCand - 1)
  {
 @@ -2314,7 +2312,7 @@
  cu-clipMv(mvCand);

  xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, 
 ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv);
 -uint32_t cost = 
 m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
 +uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, 
 m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
  cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS);

  if (bestCost  cost)
 @@ -2328,11 +2326,11 @@

  int merange = m_cfg-param-searchRange;
  xSetSearchRange(cu, mvp, merange, mvmin, mvmax);
 -int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
 mvmax, mvp, numMvc, mvc, merange, outmv);
 +int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, 
 mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, 
 m_cfg-param-subpelRefine);

  /* Get total cost of partition, but only include MV bit cost 
 once */
 -bits += m_me.bitcost(outmv);
 -uint32_t cost = (satdCost - m_me.mvcost(outmv)) + 
 m_rdCost-getCost(bits);
 +bits += m_me.bitcost(outmv, mvp);
 +uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + 
 m_rdCost-getCost(bits);

  /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
  xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, 
 cost);
 @@ -2368,7 +2366,7 @@

  int partEnum = partitionFromSizes(roiWidth, roiHeight);
  primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, 
 m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32);
 -int satdCost = m_me.bufSATD(avg, roiWidth);
 +int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);

  bidirBits = list[0].bits + list[1].bits + listSelBits[2] - 
 (listSelBits[0] + listSelBits[1]);
  bidirCost = satdCost + m_rdCost-getCost(bidirBits);
 @@ -2397,17 +2395,15 @@
  intptr_t refStride = m_mref[0][0]-lumaStride;

  primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, 
 refStride, ref1, refStride, 32);
 -satdCost = m_me.bufSATD(avg, roiWidth);
 +satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);

  MV mvp0 = list[0].mvp;
  int mvpIdx0 = list[0].mvpIdx;
 -m_me.setMVP(mvp0);
 -uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + 
 m_me.bitcost(mvzero);
 +uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, 
 mvp0) + m_me.bitcost(mvzero, mvp0);

  MV mvp1 = list[1].mvp;
  int mvpIdx1 = list[1].mvpIdx;
 -m_me.setMVP(mvp1);
 -uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + 
 m_me.bitcost(mvzero);
 +uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, 
 mvp1) + m_me.bitcost(mvzero, mvp1);

  uint32_t cost = satdCost + m_rdCost-getCost(bits0) + 
 m_rdCost-getCost(bits1);

 @@ -2556,9 +2552,8 @@
  {
  assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred);

 -m_me.setMVP(mvPred);
  int bestMvpIdx = outMvpIdx;
 -int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS;
 +int mvBitsOrig = 

Re: [x265] ASM crash in r6706

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 11:25 AM, Robert Bacs
robert.b...@rapidsolution.ro wrote:
 I followed these steps to build x264:

 http://forum.videohelp.com/threads/357754-%5BHEVC%5D-x265-EXE-mingw-builds

 I'm using the following command line to test the CLI:

 x265.exe --input in.y4m --output out.hevc

 and below is the resulting call-stack:

 #0  0x0051995e in x265_quant_sse4 ()
 #1  0x00603ba7 in x265::TComTrQuant::getTUEntropyCodingParameters (cu=
 0x5c32d98, result=..., absPartIdx=96491840, log2TrSize=16,
 ttype=x265::TEXT_CHROMA)
 at E:/temp/x265_clean/source/Lib/TLibCommon/TComTrQuant.h:171
 #2  0x005b5da8 in x265::TComTrQuant::transformNxN (this=0x54ea800,
 cu=0x5c32d98, residual=0x5c8faa0, stride=32, coeff=0x5c05940, trSize=16,
 ttype=x265::TEXT_CHROMA, absPartIdx=0, lastPos=0x2def748,
 useTransformSkip=false, curUseRDOQ=true)
 at e:/temp/x265_clean/source/Lib/TLibCommon/TComTrQuant.cpp:349
 #3  0x0059c289 in x265::TEncSearch::xIntraCodingChromaBlk (this=0x54ea518,
 cu=0x5c32d98, trDepth=1, absPartIdx=0, absPartIdxStep=64, fencYuv=
 0x5c93d38, predYuv=0x5c82a58, resiYuv=0x5c8da00, outDist=@0x2defab4: 0,
 chromaId=1, bReusePred=false)
 at e:/temp/x265_clean/source/Lib/TLibEncoder/TEncSearch.cpp:579
 #4  0x0059f6a2 in x265::TEncSearch::xRecurIntraChromaCodingQT (
 this=0x54ea518, cu=0x5c32d98, trDepth=1, absPartIdx=0,
 fencYuv=0x5c93d38,
 predYuv=0x5c82a58, resiYuv=0x5c8da00, outDist=@0x2defab4: 0)
 at e:/temp/x265_clean/source/Lib/TLibEncoder/TEncSearch.cpp:1304
 #5  0x0059f7a6 in x265::TEncSearch::xRecurIntraChromaCodingQT (
 this=0x54ea518, cu=0x5c32d98, trDepth=0, absPartIdx=0,
 fencYuv=0x5c93d38,
 predYuv=0x5c82a58, resiYuv=0x5c8da00, outDist=@0x2defab4: 0)
 at e:/temp/x265_clean/source/Lib/TLibEncoder/TEncSearch.cpp:1323
 #6  0x005a in x265::TEncSearch::estIntraPredChromaQT (this=0x54ea518,

Is your source video publicly available?

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] ASM crash in r6706

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 2:07 PM, Robert Bacs
robert.b...@rapidsolution.ro wrote:
 On 4/15/2014 9:16 PM, Steve Borho wrote:

 Is your source video publicly available?

 My source was a y4m created with ffmpeg, but you can use this one
 http://media.xiph.org/video/derf/y4m/bowing_cif.y4m, I just tried and got a
 similar call-stack:


Thanks, can you paste the output log data as well?  That would tell me
the version of GCC and other build-specific info.

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] ASM crash in r6706

2014-04-15 Thread Robert Bacs

On 4/15/2014 10:24 PM, Steve Borho wrote:
Thanks, can you paste the output log data as well? That would tell me 
the version of GCC and other build-specific info. 


Below is the output log:

[New Thread 8776.0x1c7c]
y4m  [info]: 352x288 fps 3/1001 i420 sar 128:117 frames 0 - 299 of 300
[New Thread 8776.0x1ec0]
x265 [info]: HEVC encoder version 0.9+48-9f0f011294bd
x265 [info]: build info [Windows][GCC 4.8.2][32 bit] 8bpp
x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2
[New Thread 8776.0x1840]
[New Thread 8776.0x1a54]
[New Thread 8776.0x26f4]
[New Thread 8776.0x838]
[New Thread 8776.0x205c]
[New Thread 8776.0x25e4]
[New Thread 8776.0x1820]
[New Thread 8776.0x20dc]
x265 [info]: WPP streams / pool / frames : 5 / 8 / 3
x265 [info]: Main profile, Level-2 (Main tier)
x265 [info]: CU size : 64
x265 [info]: Max RQT depth inter / intra : 1 / 1
x265 [info]: ME / range / subpel / merge : hex / 57 / 2 / 2
x265 [info]: Keyframe min / max / scenecut   : 25 / 250 / 40
x265 [info]: Lookahead / bframes / badapt: 20 / 4 / 2
x265 [info]: b-pyramid / weightp / weightb / refs: 1 / 1 / 0 / 3
x265 [info]: Rate Control / AQ-Strength / CUTree : CRF-28.0 / 1.0 / 1
x265 [info]: tools: rect amp rd=3 lft sao-lcu sign-hide
[New Thread 8776.0x25b0]
[New Thread 8776.0x14c4]
[New Thread 8776.0x134c]

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 8776.0x1840]
0x0051995e in x265_quant_sse4 ()
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] ASM crash in r6706

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 2:41 PM, Robert Bacs
robert.b...@rapidsolution.ro wrote:
 On 4/15/2014 10:24 PM, Steve Borho wrote:

 Thanks, can you paste the output log data as well? That would tell me the
 version of GCC and other build-specific info.


 Below is the output log:

 [New Thread 8776.0x1c7c]
 y4m  [info]: 352x288 fps 3/1001 i420 sar 128:117 frames 0 - 299 of 300
 [New Thread 8776.0x1ec0]
 x265 [info]: HEVC encoder version 0.9+48-9f0f011294bd
 x265 [info]: build info [Windows][GCC 4.8.2][32 bit] 8bpp
 x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2

Thanks, this is all useful. It looks like perhaps a Win32 issue.  My
guess is that if you compiled for Win64 this problem would go away.
We should have this fixed soon, though.

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] ASM crash in r6706

2014-04-15 Thread Robert Bacs

On 4/15/2014 11:28 PM, Steve Borho wrote:
Thanks, this is all useful. It looks like perhaps a Win32 issue. My 
guess is that if you compiled for Win64 this problem would go away. We 
should have this fixed soon, though. 


yes, you're right...I just tried 64-bit version and it works fine. Thanks.

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] alignment DCT8's stack to 64-bytes to avoid crash and improvement cache performance

2014-04-15 Thread Min Chen
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1397616580 -28800
# Node ID 1a8b54ce0dfa8eba524c4cadc81939710054ae44
# Parent  1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
alignment DCT8's stack to 64-bytes to avoid crash and improvement cache 
performance

diff -r 1cf67a7b362d -r 1a8b54ce0dfa source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asmMon Apr 14 21:26:37 2014 -0500
+++ b/source/common/x86/dct8.asmWed Apr 16 10:49:40 2014 +0800
@@ -834,8 +834,14 @@
 
 ret
 
-cglobal idct8, 3,7,8,0-16*mmsize
+cglobal idct8, 3,7,8 ;,0-16*mmsize
+; alignment stack to 64-bytes
 mov r5, rsp
+sub rsp, 16*mmsize + gprsize
+and rsp, ~(64-1)
+mov [rsp + 16*mmsize], r5
+mov r5, rsp
+
 lea r4, [tab_idct8_3]
 lea r6, [tab_dct4]
 
@@ -866,4 +872,7 @@
 
 callpatial_butterfly_inverse_internal_pass2
 
+; restore origin stack pointer
+mov rsp, [rsp + 16*mmsize]
+
 RET

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] ASM crash in r6706

2014-04-15 Thread Steve Borho
On Tue, Apr 15, 2014 at 9:31 PM, chen chenm...@163.com wrote:
 Are you defined HAVE_ALIGNED_STACK=1 in your configure?
 The x86 version quant use stack to store temporary data with movdqa

 Could you give me your exe file? so I may check the really assembly code.
 Thanks!

GCC builds always set HAVE_ALIGNED_STACK=1

If we need to disable this for Win32, it would not be hard.

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel