On Tue, Oct 20, 2015 at 4:20 PM, Timothy Gu <timothyg...@gmail.com> wrote: > Before: > 15543 decicycles in get_pixels, 4193214 runs, 1090 skips > After: > 5713 decicycles in get_pixels, 8387564 runs, 1044 skips > --- > libavcodec/pixblockdsp.c | 36 ++++++++++++++++++++++++++++++----- > libavcodec/pixblockdsp_template.c | 40 > --------------------------------------- > 2 files changed, 31 insertions(+), 45 deletions(-) > delete mode 100644 libavcodec/pixblockdsp_template.c > > diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c > index 322e1dd..0f23d8a 100644 > --- a/libavcodec/pixblockdsp.c > +++ b/libavcodec/pixblockdsp.c > @@ -23,12 +23,38 @@ > #include "avcodec.h" > #include "pixblockdsp.h" > > -#define BIT_DEPTH 16 > -#include "pixblockdsp_template.c" > -#undef BIT_DEPTH > +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t > *pixels, > + ptrdiff_t line_size) > +{ > + memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8); > + memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8); > + memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8); > + memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8); > + memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8); > + memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8); > + memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8); > + memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8); > +} > + > +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels, > + ptrdiff_t line_size) > +{ > + int i; > > -#define BIT_DEPTH 8 > -#include "pixblockdsp_template.c" > + /* read the pixels */ > + for (i = 0; i < 8; i++) { > + block[0] = pixels[0]; > + block[1] = pixels[1]; > + block[2] = pixels[2]; > + block[3] = pixels[3]; > + block[4] = pixels[4]; > + block[5] = pixels[5]; > + block[6] = pixels[6]; > + block[7] = pixels[7]; > + pixels += line_size; > + block += 8; > + } > +}
out of curiosity: do you get gains for get_pixels_8_c as well? What I am surprised to see is that even with the restrict keyword, a compiler does not optimize the assignments within the loop to some sort of block assignment (which I assume memcpy is doing). > > static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, > const uint8_t *s2, int stride) > diff --git a/libavcodec/pixblockdsp_template.c > b/libavcodec/pixblockdsp_template.c > deleted file mode 100644 > index d1e9102..0000000 > --- a/libavcodec/pixblockdsp_template.c > +++ /dev/null > @@ -1,40 +0,0 @@ > -/* > - * This file is part of FFmpeg. > - * > - * FFmpeg is free software; you can redistribute it and/or > - * modify it under the terms of the GNU Lesser General Public > - * License as published by the Free Software Foundation; either > - * version 2.1 of the License, or (at your option) any later version. > - * > - * FFmpeg is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * Lesser General Public License for more details. > - * > - * You should have received a copy of the GNU Lesser General Public > - * License along with FFmpeg; if not, write to the Free Software > - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > - */ > - > -#include "bit_depth_template.c" > - > -static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t > *_pixels, > - ptrdiff_t line_size) > -{ > - const pixel *pixels = (const pixel *) _pixels; > - int i; > - > - /* read the pixels */ > - for (i = 0; i < 8; i++) { > - block[0] = pixels[0]; > - block[1] = pixels[1]; > - block[2] = pixels[2]; > - block[3] = pixels[3]; > - block[4] = pixels[4]; > - block[5] = pixels[5]; > - block[6] = pixels[6]; > - block[7] = pixels[7]; > - pixels += line_size / sizeof(pixel); > - block += 8; > - } > -} > -- > 1.9.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel