On Thu, Oct 17, 2013 at 10:32 AM, <[email protected]> wrote:
> # HG changeset patch > # User Dnyaneshwar Gorade <[email protected]> > # Date 1382023822 -19800 > # Thu Oct 17 21:00:22 2013 +0530 > # Node ID 4dbd17ef69db91b5604f9c5cc6a4a62f15b91ab0 > # Parent f6d04c660b9bb1b0cf6274faf514be77148aa312 > added cvt32to16_shr function to testbench. > > diff -r f6d04c660b9b -r 4dbd17ef69db source/Lib/TLibCommon/TComTrQuant.cpp > --- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Oct 17 20:34:48 2013 > +0530 > +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Thu Oct 17 21:00:22 2013 > +0530 > @@ -493,7 +493,7 @@ > transformSkipShift = shift; > for (j = 0; j < height; j++) > { > - primitives.cvt32to16_shr(&residual[j * stride], &coef[j * > width], shift, width); > + primitives.cvt32to16_shr(residual, coef, stride, shift, > width); > you should explain in the commit message why a stride argument is now required > } > } > else > diff -r f6d04c660b9b -r 4dbd17ef69db source/common/pixel.cpp > --- a/source/common/pixel.cpp Thu Oct 17 20:34:48 2013 +0530 > +++ b/source/common/pixel.cpp Thu Oct 17 21:00:22 2013 +0530 > @@ -439,13 +439,18 @@ > } > } > > -void convert32to16_shr(short *dst, int *src, int shift, int num) > +void convert32to16_shr(short *dst, int *src, intptr_t stride, int shift, > int size) > { > int round = 1 << (shift - 1); > > - for (int i = 0; i < num; i++) > + for (int i = 0; i < size; i++) > { > - dst[i] = (short)((src[i] + round) >> shift); > + for (int j = 0; j < size; j++) > + { > + dst[j] = (short)((src[j] + round) >> shift); > + } > + src += size; > + dst += stride; > } > } > > diff -r f6d04c660b9b -r 4dbd17ef69db source/common/primitives.h > --- a/source/common/primitives.h Thu Oct 17 20:34:48 2013 +0530 > +++ b/source/common/primitives.h Thu Oct 17 21:00:22 2013 +0530 > @@ -179,7 +179,7 @@ > > typedef void (*cvt16to32_shl_t)(int *dst, short *src, intptr_t, int, int); > typedef void (*cvt16to16_shl_t)(short *dst, short *src, int, int, > intptr_t, int); > -typedef void (*cvt32to16_shr_t)(short *dst, int *src, int, int); > +typedef void (*cvt32to16_shr_t)(short *dst, int *src, intptr_t, int, int); > > typedef void (*dct_t)(short *src, int *dst, intptr_t stride); > typedef void (*idct_t)(int *src, short *dst, intptr_t stride); > diff -r f6d04c660b9b -r 4dbd17ef69db source/common/vec/pixel-sse3.cpp > --- a/source/common/vec/pixel-sse3.cpp Thu Oct 17 20:34:48 2013 +0530 > +++ b/source/common/vec/pixel-sse3.cpp Thu Oct 17 21:00:22 2013 +0530 > @@ -31,23 +31,25 @@ > using namespace x265; > > namespace { > -void convert32to16_shr(short *dst, int *org, int shift, int num) > +void convert32to16_shr(short *dst, int *org, intptr_t stride, int shift, > int size) > { > - int i; > + int i, j; > __m128i round = _mm_set1_epi32(1 << (shift - 1)); > > - for (i = 0; i < num; i += 4) > + for (i = 0; i < size; i++) > { > - __m128i im32; > - __m128i im16; > - > - im32 = _mm_loadu_si128((__m128i const*)org); > - im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), > _mm_cvtsi32_si128(shift)); > - im16 = _mm_packs_epi32(im32, im32); > - _mm_storeu_si128((__m128i*)dst, im16); > - > - org += 4; > - dst += 4; > + for (j = 0; j < size; j += 4) > + { > + __m128i im32; > + __m128i im16; > + > + im32 = _mm_loadu_si128((__m128i const*)(org + j)); > + im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), > _mm_cvtsi32_si128(shift)); > + im16 = _mm_packs_epi32(im32, im32); > + _mm_storel_epi64((__m128i*)(dst + j), im16); > + } > + org += size; > + dst += stride; > } > } > > @@ -639,6 +641,7 @@ > //p.cvt32to16_shr = convert32to16_shr; > p.cvt16to32_shl = convert16to32_shl; > p.cvt16to16_shl = convert16to16_shl; > + p.cvt32to16_shr = convert32to16_shr; > > #if !HIGH_BIT_DEPTH > p.transpose[0] = transpose4; > diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.cpp > --- a/source/test/pixelharness.cpp Thu Oct 17 20:34:48 2013 +0530 > +++ b/source/test/pixelharness.cpp Thu Oct 17 21:00:22 2013 +0530 > @@ -494,6 +494,39 @@ > return true; > } > > +bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, > cvt32to16_shr_t opt) > +{ > + int bufsize = STRIDE * STRIDE; > + int* src = (int*)X265_MALLOC(int, bufsize); > Does this really need a new buffer? Can you just use the test buffers already allocated? If not, then all this needs to be in the pixel harness constructor > + > + int shift = (rand() % 7 + 1); > + > + if (!src) > + { > + fprintf(stderr, "malloc failed, unable to initiate tests!\n"); > + exit(1); > + } > + > + for (int i = 0; i < bufsize; i++) > + { > + src[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; > + } > + > + ALIGN_VAR_16(short, ref_dest[64 * 64]); > + ALIGN_VAR_16(short, opt_dest[64 * 64]); > + > + memset(ref_dest, 0, 64 * 64 * sizeof(short)); > + memset(opt_dest, 0, 64 * 64 * sizeof(short)); > + > + opt(opt_dest, src, STRIDE, shift, STRIDE); > + ref(ref_dest, src, STRIDE, shift, STRIDE); > + > + if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short))) > + return false; > white-space > + > + return true; > +} > + > bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt) > { > ALIGN_VAR_16(pixel, ref_dest[64 * 64]); > @@ -665,7 +698,14 @@ > } > } > } > - > + if(opt.cvt32to16_shr) > white-space > + { > + if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr)) > + { > + printf("cvt32to16 failed!\n"); > + return false; > + } > + } > You also need to add this primitive to the function which compares performance between optimized and C ref > if (opt.blockcpy_pp) > { > if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp)) > diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.h > --- a/source/test/pixelharness.h Thu Oct 17 20:34:48 2013 +0530 > +++ b/source/test/pixelharness.h Thu Oct 17 21:00:22 2013 +0530 > @@ -53,6 +53,7 @@ > bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt); > bool check_downscale_t(downscale_t ref, downscale_t opt); > bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt); > + bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt); > > public: > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
