On Tue, Oct 15, 2013 at 10:52 AM, <prav...@multicorewareinc.com> wrote:
> # HG changeset patch > # User Praveen Tiwari > # Date 1381852323 -19800 > # Node ID 39fc3c36e1b1b5fcaa7a7f65ddd21a2ecba1fc06 > # Parent 1087f1f3bf5ab0a87023975458d0273be6908a98 > Added C primitive and unit test code for chroma filter > > diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/common/ipfilter.cpp > --- a/source/common/ipfilter.cpp Tue Oct 15 20:57:54 2013 +0530 > +++ b/source/common/ipfilter.cpp Tue Oct 15 21:22:03 2013 +0530 > @@ -3,6 +3,7 @@ > * > * Authors: Deepthi Devaki <deepthidev...@multicorewareinc.com>, > * Rajesh Paulraj <raj...@multicorewareinc.com> > + * Praveen Kumar Tiwari <prav...@multicorewareinc.com> > * > * This program is free software; you can redistribute it and/or modify > * it under the terms of the GNU General Public License as published by > @@ -442,6 +443,55 @@ > txt += stride; > } > } > + > +template<int N, int width> > +void interp_horiz_pp(pixel *src, intptr_t srcStride, pixel *dst, intptr_t > dstStride, int height, int coeffIdx) > +{ > + int cStride = 1; > + short const * coeff= g_chromaFilter[coeffIdx]; > + src -= (N / 2 - 1) * cStride; > + coeffIdx; > + int offset; > + short maxVal; > + int headRoom = IF_INTERNAL_PREC - X265_DEPTH; > + offset = (1 << (headRoom - 1)); > + maxVal = (1 << X265_DEPTH) - 1; > + > + int row, col; > + for (row = 0; row < height; row++) > + { > + for (col = 0; col < width; col++) > + { > + int sum; > + > + sum = src[col + 0 * cStride] * coeff[0]; > + sum += src[col + 1 * cStride] * coeff[1]; > + if (N >= 4) > + { > + sum += src[col + 2 * cStride] * coeff[2]; > + sum += src[col + 3 * cStride] * coeff[3]; > + } > the N>= 6 check seems out of place, unless we're going to instantiate a 7tap filter > + if (N >= 6) > + { > + sum += src[col + 4 * cStride] * coeff[4]; > + sum += src[col + 5 * cStride] * coeff[5]; > + } > + if (N == 8) > + { > + sum += src[col + 6 * cStride] * coeff[6]; > + sum += src[col + 7 * cStride] * coeff[7]; > + } > + short val = (short)(sum + offset) >> headRoom; > + > + if (val < 0) val = 0; > + if (val > maxVal) val = maxVal; > + dst[col] = (pixel)val; > + } > + > + src += srcStride; > + dst += dstStride; > + } > +} > } > > namespace x265 { > diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.cpp > --- a/source/test/ipfilterharness.cpp Tue Oct 15 20:57:54 2013 +0530 > +++ b/source/test/ipfilterharness.cpp Tue Oct 15 21:22:03 2013 +0530 > @@ -3,6 +3,7 @@ > * > * Authors: Deepthi Devaki <deepthidev...@multicorewareinc.com>, > * Rajesh Paulraj <raj...@multicorewareinc.com> > + * Praveen Kumar Tiwari <prav...@multicorewareinc.com> > * > * This program is free software; you can redistribute it and/or modify > * it under the terms of the GNU General Public License as published by > @@ -39,6 +40,18 @@ > "ipfilterV_pp<4>" > }; > > +const char* ChromaFilterPPNames[] = > +{ > + "interp_4tap_horiz_pp_w2", > + "interp_4tap_horiz_pp_w4", > + "interp_4tap_horiz_pp_w6", > + "interp_4tap_horiz_pp_w8", > + "interp_4tap_horiz_pp_w12", > + "interp_4tap_horiz_pp_w16", > + "interp_4tap_horiz_pp_w24", > + "interp_4tap_horiz_pp_w32" > +}; > the names should correspond with the chroma size enums, which only specify a width. This string table should be re-usable for more than just 4tap horizontal pixel to pixel interpolation. Each element should just be "W2" or something similar so it can be used as: printf("chroma_hpp[%s]: ", ChromaFilterName[w]); > + > IPFilterHarness::IPFilterHarness() > { > ipf_t_size = 200 * 200; > @@ -262,6 +275,47 @@ > return true; > } > > +bool IPFilterHarness::check_IPFilter_primitive(filter_pp_t ref, > filter_pp_t opt) > there needs to be chroma and luma versions of this function for the two filter lengths, or pass filter length as an argument > +{ > + int rand_height = rand() % 100; // Randomly generated > Height > I don't see a point to testing any sizes not used by the encoder; this just prevents possible optimizations in the primitive. Primitives that have fixed dimensions should be tested with those fixed dimensions used by the encoder. > + int rand_val, rand_srcStride, rand_dstStride, rand_coeffIdx; > + > + for (int i = 0; i <= 100; i++) > + { > + memset(IPF_vec_output_p, 0, ipf_t_size); // Initialize > output buffer to zero > + memset(IPF_C_output_p, 0, ipf_t_size); // Initialize > output buffer to zero > is memzero really necessary here? I don't think so + > + rand_coeffIdx = rand() % 8; // Random coeffIdex in > the filter > chroma coeff index should be 1, 2, or 3 > + rand_val = rand() % 4; // Random offset in > the filter > rand_val is unused > + rand_srcStride = rand() % 100; // Randomly generated > srcStride > + rand_dstStride = rand() % 100; // Randomly generated > dstStride > + > + if (rand_srcStride < 32) > + rand_srcStride = 32; > + > + if (rand_dstStride < 32) > + rand_dstStride = 32; > + > + opt(pixel_buff + 3 * rand_srcStride, > + rand_srcStride, > + IPF_vec_output_p, > + rand_dstStride, > + rand_height, rand_coeffIdx > + ); > + ref(pixel_buff + 3 * rand_srcStride, > + rand_srcStride, > + IPF_C_output_p, > + rand_dstStride, > + rand_height, rand_coeffIdx > + ); > + > + if (memcmp(IPF_vec_output_p, IPF_C_output_p, ipf_t_size)) > + return false; > + } > + > + return true; > +} > + > bool IPFilterHarness::testCorrectness(const EncoderPrimitives& ref, const > EncoderPrimitives& opt) > { > for (int value = 0; value < NUM_IPFILTER_P_P; value++) > @@ -318,6 +372,18 @@ > } > } > > + for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++) > + { > + if (opt.chroma_hpp[value]) > + { > this should test known heights for each width > + if (!check_IPFilter_primitive(ref.chroma_hpp[value], > opt.chroma_hpp[value])) > + { > + printf("%s failed\n", ChromaFilterPPNames[value]); > + return false; > + } > + } > + } > + > return true; > } > > @@ -372,4 +438,14 @@ > REPORT_SPEEDUP(opt.ipfilter_s2p, ref.ipfilter_s2p, > short_buff, srcStride, IPF_vec_output_p, > dstStride, width, height); > } > + > + for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++) > + { > this should measure performance at each height supported by each width > + if (opt.chroma_hpp[value]) > + { > + printf("%s\t", ChromaFilterPPNames[value]); > + REPORT_SPEEDUP(opt.chroma_hpp[value], ref.chroma_hpp[value], > + pixel_buff + 3 * srcStride, srcStride, > IPF_vec_output_p, dstStride, height, 1); > + } > + } > } > diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.h > --- a/source/test/ipfilterharness.h Tue Oct 15 20:57:54 2013 +0530 > +++ b/source/test/ipfilterharness.h Tue Oct 15 21:22:03 2013 +0530 > @@ -3,6 +3,7 @@ > * > * Authors: Deepthi Devaki <deepthidev...@multicorewareinc.com>, > * Rajesh Paulraj <raj...@multicorewareinc.com> > + * Praveen Kumar Tiwari <prav...@multicorewareinc.com> > * > * This program is free software; you can redistribute it and/or modify > * it under the terms of the GNU General Public License as published by > @@ -45,6 +46,7 @@ > bool check_IPFilter_primitive(ipfilter_sp_t ref, ipfilter_sp_t opt); > bool check_IPFilter_primitive(ipfilter_p2s_t ref, ipfilter_p2s_t opt); > bool check_IPFilter_primitive(ipfilter_s2p_t ref, ipfilter_s2p_t opt); > + bool check_IPFilter_primitive(filter_pp_t ref, filter_pp_t opt); > > public: > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel