On Fri, Oct 18, 2013 at 3:49 AM, <[email protected]> wrote:
> # HG changeset patch > # User Dnyaneshwar Gorade <[email protected]> > # Date 1382086085 -19800 > # Fri Oct 18 14:18:05 2013 +0530 > # Node ID 6d9bd6b6209e45cb49da804b23ad78424914b323 > # Parent d6d7187c5f4ea0978ebbddc1a559cea3712bf345 > added cvt32to16_shr_sse2 function to testbench. > Speed up measured is almost 14x. > pushed with minor improvements, please review > diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.cpp > --- a/source/test/pixelharness.cpp Fri Oct 18 00:42:36 2013 -0500 > +++ b/source/test/pixelharness.cpp Fri Oct 18 14:18:05 2013 +0530 > @@ -45,10 +45,12 @@ > pbuf3 = (pixel*)X265_MALLOC(pixel, bufsize); > pbuf4 = (pixel*)X265_MALLOC(pixel, bufsize); > > + ibuf1 = (int*)X265_MALLOC(int, bufsize); > + > sbuf1 = (short*)X265_MALLOC(short, bufsize); > sbuf2 = (short*)X265_MALLOC(short, bufsize); > > - if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2) > + if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 || > !ibuf1) > { > fprintf(stderr, "malloc failed, unable to initiate tests!\n"); > exit(1); > @@ -63,6 +65,8 @@ > > sbuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; > //max(SHORT_MIN, min(rand(), SHORT_MAX)); > sbuf2[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; > //max(SHORT_MIN, min(rand(), SHORT_MAX)); > + > + ibuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; > } > } > > @@ -481,6 +485,22 @@ > return true; > } > > +bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, > cvt32to16_shr_t opt) > +{ > + int shift = (rand() % 7 + 1); > + > + ALIGN_VAR_16(short, ref_dest[64 * 64]); > + ALIGN_VAR_16(short, opt_dest[64 * 64]); > + > + opt(opt_dest, ibuf1, STRIDE, shift, STRIDE); > + ref(ref_dest, ibuf1, STRIDE, shift, STRIDE); > + > + if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short))) > + return false; > + > + return true; > +} > + > bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, > const EncoderPrimitives& opt) > { > if (opt.satd[part]) > @@ -615,6 +635,15 @@ > } > } > > + if (opt.cvt32to16_shr) > + { > + if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr)) > + { > + printf("cvt32to16 failed!\n"); > + return false; > + } > + } > + > if (opt.blockcpy_pp) > { > if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp)) > @@ -810,6 +839,12 @@ > } > } > > + if (opt.cvt32to16_shr) > + { > + printf("cvt32to16 conversion"); > + REPORT_SPEEDUP(opt.cvt32to16_shr, ref.cvt32to16_shr, sbuf1, > ibuf1, 64, 5, 64); > + } > + > if (opt.blockcpy_pp) > { > printf("block cpy"); > diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.h > --- a/source/test/pixelharness.h Fri Oct 18 00:42:36 2013 -0500 > +++ b/source/test/pixelharness.h Fri Oct 18 14:18:05 2013 +0530 > @@ -33,6 +33,8 @@ > > pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4; > > + int *ibuf1; > + > short *sbuf1, *sbuf2; > > bool check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt); > @@ -52,6 +54,7 @@ > bool check_pixeladd_ss(pixeladd_ss_t ref, pixeladd_ss_t opt); > bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt); > bool check_downscale_t(downscale_t ref, downscale_t opt); > + bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt); > > public: > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
