Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops

2013-11-18 Thread chen
@@ -640,26 +621,9 @@
 width  = m_hChromaShift;
 height = m_vChromaShift;
 
-for (y = height - 1; y = 0; y--)
-{
-for (x = width - 1; x = 0; )
-{
-// note: chroma min width is 2
-dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset)  shiftNum);
-dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset)  shiftNum);
-x--;
-dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset)  shiftNum);
-dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset)  shiftNum);
-x--;
-}
-
-srcU0 += src0Stride;
-srcU1 += src1Stride;
-srcV0 += src0Stride;
-srcV1 += src1Stride;
-dstU  += dststride;
-dstV  += dststride;
-}
+int part = partitionFromSizes(width, height);
you use Chroma size to get index, I think is error.
 
+primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, 
srcU1, src1Stride);
+primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, 
srcV1, src1Stride);
 }
 }
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops

2013-11-18 Thread Deepthi Nandakumar
Pushed. But next time, please organize your patches more clearly.

1. Add C primitive, if it does not exist.
2. Add the function pointer declarations and new primitive declarations to
EncoderPrimitives struct.
3. Add testbench code for primitives.
4. Add asm code.

Once all above patches have been reviewed, pushed and tested on all
platforms, then you can integrate it with the actual encoder.




On Mon, Nov 18, 2013 at 3:23 PM, dnyanesh...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Dnyaneshwar G dnyanesh...@multicorewareinc.com
 # Date 1384768323 -19800
 #  Mon Nov 18 15:22:03 2013 +0530
 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1
 # Parent  ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0
 TComYuv::addAvg, primitive function for luma and chroma loops

 diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp
 --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530
 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530
 @@ -589,9 +589,7 @@

  void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t
 partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
  {
 -int x, y;
  uint32_t src0Stride, src1Stride, dststride;
 -int shiftNum, offset;

  int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx);
  int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx);
 @@ -605,61 +603,24 @@
  Pel* dstU = getCbAddr(partUnitIdx);
  Pel* dstV = getCrAddr(partUnitIdx);

 +int part = partitionFromSizes(width, height);
 +
  if (bLuma)
  {
  src0Stride = srcYuv0-m_width;
  src1Stride = srcYuv1-m_width;
  dststride  = getStride();
 -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 -offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;

 -for (y = 0; y  height; y++)
 -{
 -for (x = 0; x  width; x += 4)
 -{
 -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] +
 offset)  shiftNum);
 -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] +
 offset)  shiftNum);
 -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] +
 offset)  shiftNum);
 -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] +
 offset)  shiftNum);
 -}
 -
 -srcY0 += src0Stride;
 -srcY1 += src1Stride;
 -dstY  += dststride;
 -}
 +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride,
 srcY1, src1Stride);
  }
  if (bChroma)
  {
 -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 -offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 -
  src0Stride = srcYuv0-m_cwidth;
  src1Stride = srcYuv1-m_cwidth;
  dststride  = getCStride();

 -width  = m_hChromaShift;
 -height = m_vChromaShift;
 -
 -for (y = height - 1; y = 0; y--)
 -{
 -for (x = width - 1; x = 0; )
 -{
 -// note: chroma min width is 2
 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) 
 shiftNum);
 -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) 
 shiftNum);
 -x--;
 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) 
 shiftNum);
 -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) 
 shiftNum);
 -x--;
 -}
 -
 -srcU0 += src0Stride;
 -srcU1 += src1Stride;
 -srcV0 += src0Stride;
 -srcV1 += src1Stride;
 -dstU  += dststride;
 -dstV  += dststride;
 -}
 +primitives.chroma_addAvg[part](dstU, dststride, srcU0,
 src0Stride, srcU1, src1Stride);
 +primitives.chroma_addAvg[part](dstV, dststride, srcV0,
 src0Stride, srcV1, src1Stride);
  }
  }

 diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp
 --- a/source/common/pixel.cpp   Mon Nov 18 12:26:44 2013 +0530
 +++ b/source/common/pixel.cpp   Mon Nov 18 15:22:03 2013 +0530
 @@ -794,6 +794,27 @@
  a += dstride;
  }
  }
 +
 +templateint bx, int by
 +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t
 src0Stride, int16_t* src1, intptr_t src1Stride)
 +{
 +int shiftNum, offset;
 +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 +offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 +
 +for (int y = 0; y  by; y++)
 +{
 +for (int x = 0; x  bx; x += 2)
 +{
 +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) 
 shiftNum);
 +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) 
 shiftNum);
 +}
 +
 +src0 += src0Stride;
 +src1 += src1Stride;
 +dst  += dstStride;
 +}
 +}
  }  // end anonymous namespace

  namespace x265 {
 @@ -835,12 +856,14 @@
  p.satd[LUMA_16x64] = satd816, 64;

  #define CHROMA(W, H) \
 +p.chroma_addAvg[CHROMA_ ## W ## x ## H]  = addAvgW, H; \
  p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ##