Re: [FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c

2015-10-31 Thread Timothy Gu
On Tue, Oct 20, 2015 at 11:29 PM Michael Niedermayer 
wrote:

> > diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
> > index 322e1dd..0f23d8a 100644
> > --- a/libavcodec/pixblockdsp.c
> > +++ b/libavcodec/pixblockdsp.c
> > @@ -23,12 +23,38 @@
> >  #include "avcodec.h"
> >  #include "pixblockdsp.h"
> >
> > -#define BIT_DEPTH 16
> > -#include "pixblockdsp_template.c"
> > -#undef BIT_DEPTH
> > +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t
> *pixels,
> > +ptrdiff_t line_size)
> > +{
> > +memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8);
> > +memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8);
> > +memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8);
> > +memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8);
> > +memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8);
> > +memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8);
> > +memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8);
> > +memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8);
>
> AV_COPY128(U)
>
Changed.

>
>
> > +}
> > +
> > +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t
> *pixels,
> > +   ptrdiff_t line_size)
> > +{
> > +int i;
> >
> > -#define BIT_DEPTH 8
> > -#include "pixblockdsp_template.c"
> > +/* read the pixels */
> > +for (i = 0; i < 8; i++) {
> > +block[0] = pixels[0];
> > +block[1] = pixels[1];
> > +block[2] = pixels[2];
> > +block[3] = pixels[3];
> > +block[4] = pixels[4];
> > +block[5] = pixels[5];
> > +block[6] = pixels[6];
> > +block[7] = pixels[7];
>
> AV_COPY64(U)

They are not equivalent.

Pushed.

Timothy
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c

2015-10-21 Thread Michael Niedermayer
On Tue, Oct 20, 2015 at 01:20:03PM -0700, Timothy Gu wrote:
> Before:
>   15543 decicycles in get_pixels, 4193214 runs,   1090 skips
> After:
>5713 decicycles in get_pixels, 8387564 runs,   1044 skips
> ---
>  libavcodec/pixblockdsp.c  | 36 ++-
>  libavcodec/pixblockdsp_template.c | 40 
> ---
>  2 files changed, 31 insertions(+), 45 deletions(-)
>  delete mode 100644 libavcodec/pixblockdsp_template.c
> 
> diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
> index 322e1dd..0f23d8a 100644
> --- a/libavcodec/pixblockdsp.c
> +++ b/libavcodec/pixblockdsp.c
> @@ -23,12 +23,38 @@
>  #include "avcodec.h"
>  #include "pixblockdsp.h"
>  
> -#define BIT_DEPTH 16
> -#include "pixblockdsp_template.c"
> -#undef BIT_DEPTH
> +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t 
> *pixels,
> +ptrdiff_t line_size)
> +{
> +memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8);

AV_COPY128(U)


> +}
> +
> +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
> +   ptrdiff_t line_size)
> +{
> +int i;
>  
> -#define BIT_DEPTH 8
> -#include "pixblockdsp_template.c"
> +/* read the pixels */
> +for (i = 0; i < 8; i++) {
> +block[0] = pixels[0];
> +block[1] = pixels[1];
> +block[2] = pixels[2];
> +block[3] = pixels[3];
> +block[4] = pixels[4];
> +block[5] = pixels[5];
> +block[6] = pixels[6];
> +block[7] = pixels[7];

AV_COPY64(U)

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Dictatorship naturally arises out of democracy, and the most aggravated
form of tyranny and slavery out of the most extreme liberty. -- Plato


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c

2015-10-20 Thread Ganesh Ajjanagadde
On Tue, Oct 20, 2015 at 4:20 PM, Timothy Gu  wrote:
> Before:
>   15543 decicycles in get_pixels, 4193214 runs,   1090 skips
> After:
>5713 decicycles in get_pixels, 8387564 runs,   1044 skips
> ---
>  libavcodec/pixblockdsp.c  | 36 ++-
>  libavcodec/pixblockdsp_template.c | 40 
> ---
>  2 files changed, 31 insertions(+), 45 deletions(-)
>  delete mode 100644 libavcodec/pixblockdsp_template.c
>
> diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
> index 322e1dd..0f23d8a 100644
> --- a/libavcodec/pixblockdsp.c
> +++ b/libavcodec/pixblockdsp.c
> @@ -23,12 +23,38 @@
>  #include "avcodec.h"
>  #include "pixblockdsp.h"
>
> -#define BIT_DEPTH 16
> -#include "pixblockdsp_template.c"
> -#undef BIT_DEPTH
> +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t 
> *pixels,
> +ptrdiff_t line_size)
> +{
> +memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8);
> +memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8);
> +}
> +
> +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
> +   ptrdiff_t line_size)
> +{
> +int i;
>
> -#define BIT_DEPTH 8
> -#include "pixblockdsp_template.c"
> +/* read the pixels */
> +for (i = 0; i < 8; i++) {
> +block[0] = pixels[0];
> +block[1] = pixels[1];
> +block[2] = pixels[2];
> +block[3] = pixels[3];
> +block[4] = pixels[4];
> +block[5] = pixels[5];
> +block[6] = pixels[6];
> +block[7] = pixels[7];
> +pixels  += line_size;
> +block   += 8;
> +}
> +}

out of curiosity: do you get gains for get_pixels_8_c as well? What I
am surprised to see is that even with the restrict keyword, a compiler
does not optimize the assignments within the loop to some sort of
block assignment (which I assume memcpy is doing).

>
>  static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
>const uint8_t *s2, int stride)
> diff --git a/libavcodec/pixblockdsp_template.c 
> b/libavcodec/pixblockdsp_template.c
> deleted file mode 100644
> index d1e9102..000
> --- a/libavcodec/pixblockdsp_template.c
> +++ /dev/null
> @@ -1,40 +0,0 @@
> -/*
> - * This file is part of FFmpeg.
> - *
> - * FFmpeg is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU Lesser General Public
> - * License as published by the Free Software Foundation; either
> - * version 2.1 of the License, or (at your option) any later version.
> - *
> - * FFmpeg is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * Lesser General Public License for more details.
> - *
> - * You should have received a copy of the GNU Lesser General Public
> - * License along with FFmpeg; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> - */
> -
> -#include "bit_depth_template.c"
> -
> -static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t 
> *_pixels,
> -  ptrdiff_t line_size)
> -{
> -const pixel *pixels = (const pixel *) _pixels;
> -int i;
> -
> -/* read the pixels */
> -for (i = 0; i < 8; i++) {
> -block[0] = pixels[0];
> -block[1] = pixels[1];
> -block[2] = pixels[2];
> -block[3] = pixels[3];
> -block[4] = pixels[4];
> -block[5] = pixels[5];
> -block[6] = pixels[6];
> -block[7] = pixels[7];
> -pixels  += line_size / sizeof(pixel);
> -block   += 8;
> -}
> -}
> --
> 1.9.1
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] pixblockdsp: Use memcpy for get_pixels_16_c

2015-10-20 Thread Timothy Gu
Before:
  15543 decicycles in get_pixels, 4193214 runs,   1090 skips
After:
   5713 decicycles in get_pixels, 8387564 runs,   1044 skips
---
 libavcodec/pixblockdsp.c  | 36 ++-
 libavcodec/pixblockdsp_template.c | 40 ---
 2 files changed, 31 insertions(+), 45 deletions(-)
 delete mode 100644 libavcodec/pixblockdsp_template.c

diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 322e1dd..0f23d8a 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -23,12 +23,38 @@
 #include "avcodec.h"
 #include "pixblockdsp.h"
 
-#define BIT_DEPTH 16
-#include "pixblockdsp_template.c"
-#undef BIT_DEPTH
+static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
+ptrdiff_t line_size)
+{
+memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8);
+memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8);
+memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8);
+memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8);
+memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8);
+memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8);
+memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8);
+memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8);
+}
+
+static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
+   ptrdiff_t line_size)
+{
+int i;
 
-#define BIT_DEPTH 8
-#include "pixblockdsp_template.c"
+/* read the pixels */
+for (i = 0; i < 8; i++) {
+block[0] = pixels[0];
+block[1] = pixels[1];
+block[2] = pixels[2];
+block[3] = pixels[3];
+block[4] = pixels[4];
+block[5] = pixels[5];
+block[6] = pixels[6];
+block[7] = pixels[7];
+pixels  += line_size;
+block   += 8;
+}
+}
 
 static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
   const uint8_t *s2, int stride)
diff --git a/libavcodec/pixblockdsp_template.c 
b/libavcodec/pixblockdsp_template.c
deleted file mode 100644
index d1e9102..000
--- a/libavcodec/pixblockdsp_template.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "bit_depth_template.c"
-
-static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t 
*_pixels,
-  ptrdiff_t line_size)
-{
-const pixel *pixels = (const pixel *) _pixels;
-int i;
-
-/* read the pixels */
-for (i = 0; i < 8; i++) {
-block[0] = pixels[0];
-block[1] = pixels[1];
-block[2] = pixels[2];
-block[3] = pixels[3];
-block[4] = pixels[4];
-block[5] = pixels[5];
-block[6] = pixels[6];
-block[7] = pixels[7];
-pixels  += line_size / sizeof(pixel);
-block   += 8;
-}
-}
-- 
1.9.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel