Re: [FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c
On Tue, Oct 20, 2015 at 11:29 PM Michael Niedermayerwrote: > > diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c > > index 322e1dd..0f23d8a 100644 > > --- a/libavcodec/pixblockdsp.c > > +++ b/libavcodec/pixblockdsp.c > > @@ -23,12 +23,38 @@ > > #include "avcodec.h" > > #include "pixblockdsp.h" > > > > -#define BIT_DEPTH 16 > > -#include "pixblockdsp_template.c" > > -#undef BIT_DEPTH > > +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t > *pixels, > > +ptrdiff_t line_size) > > +{ > > +memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8); > > +memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8); > > +memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8); > > +memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8); > > +memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8); > > +memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8); > > +memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8); > > +memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8); > > AV_COPY128(U) > Changed. > > > > +} > > + > > +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t > *pixels, > > + ptrdiff_t line_size) > > +{ > > +int i; > > > > -#define BIT_DEPTH 8 > > -#include "pixblockdsp_template.c" > > +/* read the pixels */ > > +for (i = 0; i < 8; i++) { > > +block[0] = pixels[0]; > > +block[1] = pixels[1]; > > +block[2] = pixels[2]; > > +block[3] = pixels[3]; > > +block[4] = pixels[4]; > > +block[5] = pixels[5]; > > +block[6] = pixels[6]; > > +block[7] = pixels[7]; > > AV_COPY64(U) They are not equivalent. Pushed. Timothy ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c
On Tue, Oct 20, 2015 at 01:20:03PM -0700, Timothy Gu wrote: > Before: > 15543 decicycles in get_pixels, 4193214 runs, 1090 skips > After: >5713 decicycles in get_pixels, 8387564 runs, 1044 skips > --- > libavcodec/pixblockdsp.c | 36 ++- > libavcodec/pixblockdsp_template.c | 40 > --- > 2 files changed, 31 insertions(+), 45 deletions(-) > delete mode 100644 libavcodec/pixblockdsp_template.c > > diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c > index 322e1dd..0f23d8a 100644 > --- a/libavcodec/pixblockdsp.c > +++ b/libavcodec/pixblockdsp.c > @@ -23,12 +23,38 @@ > #include "avcodec.h" > #include "pixblockdsp.h" > > -#define BIT_DEPTH 16 > -#include "pixblockdsp_template.c" > -#undef BIT_DEPTH > +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t > *pixels, > +ptrdiff_t line_size) > +{ > +memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8); AV_COPY128(U) > +} > + > +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels, > + ptrdiff_t line_size) > +{ > +int i; > > -#define BIT_DEPTH 8 > -#include "pixblockdsp_template.c" > +/* read the pixels */ > +for (i = 0; i < 8; i++) { > +block[0] = pixels[0]; > +block[1] = pixels[1]; > +block[2] = pixels[2]; > +block[3] = pixels[3]; > +block[4] = pixels[4]; > +block[5] = pixels[5]; > +block[6] = pixels[6]; > +block[7] = pixels[7]; AV_COPY64(U) [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Dictatorship naturally arises out of democracy, and the most aggravated form of tyranny and slavery out of the most extreme liberty. -- Plato signature.asc Description: Digital signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c
On Tue, Oct 20, 2015 at 4:20 PM, Timothy Guwrote: > Before: > 15543 decicycles in get_pixels, 4193214 runs, 1090 skips > After: >5713 decicycles in get_pixels, 8387564 runs, 1044 skips > --- > libavcodec/pixblockdsp.c | 36 ++- > libavcodec/pixblockdsp_template.c | 40 > --- > 2 files changed, 31 insertions(+), 45 deletions(-) > delete mode 100644 libavcodec/pixblockdsp_template.c > > diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c > index 322e1dd..0f23d8a 100644 > --- a/libavcodec/pixblockdsp.c > +++ b/libavcodec/pixblockdsp.c > @@ -23,12 +23,38 @@ > #include "avcodec.h" > #include "pixblockdsp.h" > > -#define BIT_DEPTH 16 > -#include "pixblockdsp_template.c" > -#undef BIT_DEPTH > +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t > *pixels, > +ptrdiff_t line_size) > +{ > +memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8); > +memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8); > +} > + > +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels, > + ptrdiff_t line_size) > +{ > +int i; > > -#define BIT_DEPTH 8 > -#include "pixblockdsp_template.c" > +/* read the pixels */ > +for (i = 0; i < 8; i++) { > +block[0] = pixels[0]; > +block[1] = pixels[1]; > +block[2] = pixels[2]; > +block[3] = pixels[3]; > +block[4] = pixels[4]; > +block[5] = pixels[5]; > +block[6] = pixels[6]; > +block[7] = pixels[7]; > +pixels += line_size; > +block += 8; > +} > +} out of curiosity: do you get gains for get_pixels_8_c as well? What I am surprised to see is that even with the restrict keyword, a compiler does not optimize the assignments within the loop to some sort of block assignment (which I assume memcpy is doing). > > static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, >const uint8_t *s2, int stride) > diff --git a/libavcodec/pixblockdsp_template.c > b/libavcodec/pixblockdsp_template.c > deleted file mode 100644 > index d1e9102..000 > --- a/libavcodec/pixblockdsp_template.c > +++ /dev/null > @@ -1,40 +0,0 @@ > -/* > - * This file is part of FFmpeg. > - * > - * FFmpeg is free software; you can redistribute it and/or > - * modify it under the terms of the GNU Lesser General Public > - * License as published by the Free Software Foundation; either > - * version 2.1 of the License, or (at your option) any later version. > - * > - * FFmpeg is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - * Lesser General Public License for more details. > - * > - * You should have received a copy of the GNU Lesser General Public > - * License along with FFmpeg; if not, write to the Free Software > - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > - */ > - > -#include "bit_depth_template.c" > - > -static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t > *_pixels, > - ptrdiff_t line_size) > -{ > -const pixel *pixels = (const pixel *) _pixels; > -int i; > - > -/* read the pixels */ > -for (i = 0; i < 8; i++) { > -block[0] = pixels[0]; > -block[1] = pixels[1]; > -block[2] = pixels[2]; > -block[3] = pixels[3]; > -block[4] = pixels[4]; > -block[5] = pixels[5]; > -block[6] = pixels[6]; > -block[7] = pixels[7]; > -pixels += line_size / sizeof(pixel); > -block += 8; > -} > -} > -- > 1.9.1 > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c
Before: 15543 decicycles in get_pixels, 4193214 runs, 1090 skips After: 5713 decicycles in get_pixels, 8387564 runs, 1044 skips --- libavcodec/pixblockdsp.c | 36 ++- libavcodec/pixblockdsp_template.c | 40 --- 2 files changed, 31 insertions(+), 45 deletions(-) delete mode 100644 libavcodec/pixblockdsp_template.c diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index 322e1dd..0f23d8a 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -23,12 +23,38 @@ #include "avcodec.h" #include "pixblockdsp.h" -#define BIT_DEPTH 16 -#include "pixblockdsp_template.c" -#undef BIT_DEPTH +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels, +ptrdiff_t line_size) +{ +memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8); +memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8); +memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8); +memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8); +memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8); +memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8); +memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8); +memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8); +} + +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels, + ptrdiff_t line_size) +{ +int i; -#define BIT_DEPTH 8 -#include "pixblockdsp_template.c" +/* read the pixels */ +for (i = 0; i < 8; i++) { +block[0] = pixels[0]; +block[1] = pixels[1]; +block[2] = pixels[2]; +block[3] = pixels[3]; +block[4] = pixels[4]; +block[5] = pixels[5]; +block[6] = pixels[6]; +block[7] = pixels[7]; +pixels += line_size; +block += 8; +} +} static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride) diff --git a/libavcodec/pixblockdsp_template.c b/libavcodec/pixblockdsp_template.c deleted file mode 100644 index d1e9102..000 --- a/libavcodec/pixblockdsp_template.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "bit_depth_template.c" - -static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels, - ptrdiff_t line_size) -{ -const pixel *pixels = (const pixel *) _pixels; -int i; - -/* read the pixels */ -for (i = 0; i < 8; i++) { -block[0] = pixels[0]; -block[1] = pixels[1]; -block[2] = pixels[2]; -block[3] = pixels[3]; -block[4] = pixels[4]; -block[5] = pixels[5]; -block[6] = pixels[6]; -block[7] = pixels[7]; -pixels += line_size / sizeof(pixel); -block += 8; -} -} -- 1.9.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel