This patch series is causing crashes in the encoder on my Mac, discarding for now
On Nov 18, 2013, at 1:14 AM, [email protected] wrote: > # HG changeset patch > # User Dnyaneshwar Gorade <[email protected]> > # Date 1384758687 -19800 > # Mon Nov 18 12:41:27 2013 +0530 > # Node ID ee062baf96b18ab2ecd64a2e4219b2a5a3c09e5d > # Parent e2895ce7bbeb2c3d845fee2578758d0012fa2cb4 > TComYuv::addAvg, primitive function for luma and chroma loops > > diff -r e2895ce7bbeb -r ee062baf96b1 source/Lib/TLibCommon/TComYuv.cpp > --- a/source/Lib/TLibCommon/TComYuv.cpp Sun Nov 17 11:24:13 2013 -0600 > +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:41:27 2013 +0530 > @@ -589,9 +589,7 @@ > > void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t > partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) > { > - int x, y; > uint32_t src0Stride, src1Stride, dststride; > - int shiftNum, offset; > > int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx); > int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx); > @@ -610,29 +608,12 @@ > src0Stride = srcYuv0->m_width; > src1Stride = srcYuv1->m_width; > dststride = getStride(); > - shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; > - offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; > > - for (y = 0; y < height; y++) > - { > - for (x = 0; x < width; x += 4) > - { > - dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) > >> shiftNum); > - dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) > >> shiftNum); > - dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) > >> shiftNum); > - dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) > >> shiftNum); > - } > - > - srcY0 += src0Stride; > - srcY1 += src1Stride; > - dstY += dststride; > - } > + int part = partitionFromSizes(width, height); > + primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, > srcY1, src1Stride); > } > if (bChroma) > { > - shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; > - offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; > - > src0Stride = srcYuv0->m_cwidth; > src1Stride = srcYuv1->m_cwidth; > dststride = getCStride(); > @@ -640,26 +621,9 @@ > width >>= m_hChromaShift; > height >>= m_vChromaShift; > > - for (y = height - 1; y >= 0; y--) > - { > - for (x = width - 1; x >= 0; ) > - { > - // note: chroma min width is 2 > - dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum); > - dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum); > - x--; > - dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum); > - dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum); > - x--; > - } > - > - srcU0 += src0Stride; > - srcU1 += src1Stride; > - srcV0 += src0Stride; > - srcV1 += src1Stride; > - dstU += dststride; > - dstV += dststride; > - } > + int part = partitionFromSizes(width, height); > + primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, > srcU1, src1Stride); > + primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, > srcV1, src1Stride); > } > } > > diff -r e2895ce7bbeb -r ee062baf96b1 source/common/pixel.cpp > --- a/source/common/pixel.cpp Sun Nov 17 11:24:13 2013 -0600 > +++ b/source/common/pixel.cpp Mon Nov 18 12:41:27 2013 +0530 > @@ -794,6 +794,27 @@ > a += dstride; > } > } > + > +template<int bx, int by> > +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t > src0Stride, int16_t* src1, intptr_t src1Stride) > +{ > + int shiftNum, offset; > + shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; > + offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; > + > + for (int y = 0; y < by; y++) > + { > + for (int x = 0; x < bx; x += 2) > + { > + dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) >> > shiftNum); > + dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) >> > shiftNum); > + } > + > + src0 += src0Stride; > + src1 += src1Stride; > + dst += dstStride; > + } > +} > } // end anonymous namespace > > namespace x265 { > @@ -835,12 +856,14 @@ > p.satd[LUMA_16x64] = satd8<16, 64>; > > #define CHROMA(W, H) \ > + p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvg<W, H>; \ > p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, > H>; \ > p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \ > p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\ > p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; > > #define LUMA(W, H) \ > + p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \ > p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \ > p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \ > p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\ > diff -r e2895ce7bbeb -r ee062baf96b1 source/common/primitives.h > --- a/source/common/primitives.h Sun Nov 17 11:24:13 2013 -0600 > +++ b/source/common/primitives.h Mon Nov 18 12:41:27 2013 +0530 > @@ -219,6 +219,8 @@ > > typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, > pixel *src1, intptr_t sstride0, intptr_t sstride1); > > +typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, > intptr_t src0Stride, int16_t* src1, intptr_t src1Stride); > + > /* Define a structure containing function pointers to optimized encoder > * primitives. Each pointer can reference either an assembly routine, > * a vectorized primitive, or a C function. */ > @@ -301,6 +303,9 @@ > var_t var[NUM_LUMA_PARTITIONS]; > ssim_4x4x2_core_t ssim_4x4x2_core; > plane_copy_deinterleave_t plane_copy_deinterleave_c; > + > + addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS]; > + addAvg_t chroma_addAvg[NUM_CHROMA_PARTITIONS]; > }; > > /* This copy of the table is what gets used by the encoder. > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel
signature.asc
Description: Message signed with OpenPGP using GPGMail
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
