Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
@@ -640,26 +621,9 @@ width = m_hChromaShift; height = m_vChromaShift; -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +int part = partitionFromSizes(width, height); you use Chroma size to get index, I think is error. +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
# HG changeset patch # User Dnyaneshwar G dnyanesh...@multicorewareinc.com # Date 1384768323 -19800 # Mon Nov 18 15:22:03 2013 +0530 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1 # Parent ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0 TComYuv::addAvg, primitive function for luma and chroma loops diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -589,9 +589,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { -int x, y; uint32_t src0Stride, src1Stride, dststride; -int shiftNum, offset; int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx); int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx); @@ -605,61 +603,24 @@ Pel* dstU = getCbAddr(partUnitIdx); Pel* dstV = getCrAddr(partUnitIdx); +int part = partitionFromSizes(width, height); + if (bLuma) { src0Stride = srcYuv0-m_width; src1Stride = srcYuv1-m_width; dststride = getStride(); -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; -for (y = 0; y height; y++) -{ -for (x = 0; x width; x += 4) -{ -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) shiftNum); -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) shiftNum); -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) shiftNum); -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) shiftNum); -} - -srcY0 += src0Stride; -srcY1 += src1Stride; -dstY += dststride; -} +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride); } if (bChroma) { -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - src0Stride = srcYuv0-m_cwidth; src1Stride = srcYuv1-m_cwidth; dststride = getCStride(); -width = m_hChromaShift; -height = m_vChromaShift; - -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -794,6 +794,27 @@ a += dstride; } } + +templateint bx, int by +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride) +{ +int shiftNum, offset; +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; +offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; + +for (int y = 0; y by; y++) +{ +for (int x = 0; x bx; x += 2) +{ +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) shiftNum); +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) shiftNum); +} + +src0 += src0Stride; +src1 += src1Stride; +dst += dstStride; +} +} } // end anonymous namespace namespace x265 { @@ -835,12 +856,14 @@ p.satd[LUMA_16x64] = satd816, 64; #define CHROMA(W, H) \ +p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvgW, H; \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ +p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvgW, H; \ p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ diff -r ac9e64d8a80b -r cdd54aa200bd source/common/primitives.h ---
Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
Pushed. But next time, please organize your patches more clearly. 1. Add C primitive, if it does not exist. 2. Add the function pointer declarations and new primitive declarations to EncoderPrimitives struct. 3. Add testbench code for primitives. 4. Add asm code. Once all above patches have been reviewed, pushed and tested on all platforms, then you can integrate it with the actual encoder. On Mon, Nov 18, 2013 at 3:23 PM, dnyanesh...@multicorewareinc.com wrote: # HG changeset patch # User Dnyaneshwar G dnyanesh...@multicorewareinc.com # Date 1384768323 -19800 # Mon Nov 18 15:22:03 2013 +0530 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1 # Parent ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0 TComYuv::addAvg, primitive function for luma and chroma loops diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -589,9 +589,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { -int x, y; uint32_t src0Stride, src1Stride, dststride; -int shiftNum, offset; int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx); int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx); @@ -605,61 +603,24 @@ Pel* dstU = getCbAddr(partUnitIdx); Pel* dstV = getCrAddr(partUnitIdx); +int part = partitionFromSizes(width, height); + if (bLuma) { src0Stride = srcYuv0-m_width; src1Stride = srcYuv1-m_width; dststride = getStride(); -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; -for (y = 0; y height; y++) -{ -for (x = 0; x width; x += 4) -{ -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) shiftNum); -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) shiftNum); -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) shiftNum); -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) shiftNum); -} - -srcY0 += src0Stride; -srcY1 += src1Stride; -dstY += dststride; -} +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride); } if (bChroma) { -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - src0Stride = srcYuv0-m_cwidth; src1Stride = srcYuv1-m_cwidth; dststride = getCStride(); -width = m_hChromaShift; -height = m_vChromaShift; - -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -794,6 +794,27 @@ a += dstride; } } + +templateint bx, int by +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride) +{ +int shiftNum, offset; +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; +offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; + +for (int y = 0; y by; y++) +{ +for (int x = 0; x bx; x += 2) +{ +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) shiftNum); +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) shiftNum); +} + +src0 += src0Stride; +src1 += src1Stride; +dst += dstStride; +} +} } // end anonymous namespace namespace x265 { @@ -835,12 +856,14 @@ p.satd[LUMA_16x64] = satd816, 64; #define CHROMA(W, H) \ +p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvgW, H; \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ##
[x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
# HG changeset patch # User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com # Date 1384758687 -19800 # Mon Nov 18 12:41:27 2013 +0530 # Node ID ee062baf96b18ab2ecd64a2e4219b2a5a3c09e5d # Parent e2895ce7bbeb2c3d845fee2578758d0012fa2cb4 TComYuv::addAvg, primitive function for luma and chroma loops diff -r e2895ce7bbeb -r ee062baf96b1 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Sun Nov 17 11:24:13 2013 -0600 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:41:27 2013 +0530 @@ -589,9 +589,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { -int x, y; uint32_t src0Stride, src1Stride, dststride; -int shiftNum, offset; int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx); int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx); @@ -610,29 +608,12 @@ src0Stride = srcYuv0-m_width; src1Stride = srcYuv1-m_width; dststride = getStride(); -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; -for (y = 0; y height; y++) -{ -for (x = 0; x width; x += 4) -{ -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) shiftNum); -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) shiftNum); -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) shiftNum); -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) shiftNum); -} - -srcY0 += src0Stride; -srcY1 += src1Stride; -dstY += dststride; -} +int part = partitionFromSizes(width, height); +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride); } if (bChroma) { -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - src0Stride = srcYuv0-m_cwidth; src1Stride = srcYuv1-m_cwidth; dststride = getCStride(); @@ -640,26 +621,9 @@ width = m_hChromaShift; height = m_vChromaShift; -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +int part = partitionFromSizes(width, height); +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } diff -r e2895ce7bbeb -r ee062baf96b1 source/common/pixel.cpp --- a/source/common/pixel.cpp Sun Nov 17 11:24:13 2013 -0600 +++ b/source/common/pixel.cpp Mon Nov 18 12:41:27 2013 +0530 @@ -794,6 +794,27 @@ a += dstride; } } + +templateint bx, int by +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride) +{ +int shiftNum, offset; +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; +offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; + +for (int y = 0; y by; y++) +{ +for (int x = 0; x bx; x += 2) +{ +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) shiftNum); +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) shiftNum); +} + +src0 += src0Stride; +src1 += src1Stride; +dst += dstStride; +} +} } // end anonymous namespace namespace x265 { @@ -835,12 +856,14 @@ p.satd[LUMA_16x64] = satd816, 64; #define CHROMA(W, H) \ +p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvgW, H; \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ +p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvgW, H; \ p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ diff -r e2895ce7bbeb -r ee062baf96b1 source/common/primitives.h --- a/source/common/primitives.h