Re: [FFmpeg-devel] [PATCH 2/2] pixblockdsp: Use memcpy for get_pixels_16_c

2015-10-20 Thread Michael Niedermayer
On Sat, Oct 17, 2015 at 06:05:46PM -0700, Timothy Gu wrote:
> Before:
>   15543 decicycles in get_pixels, 4193214 runs,   1090 skips
> After:
>5713 decicycles in get_pixels, 8387564 runs,   1044 skips
> ---
>  libavcodec/pixblockdsp.c  | 38 -
>  libavcodec/pixblockdsp_template.c | 40 
> ---
>  2 files changed, 33 insertions(+), 45 deletions(-)
>  delete mode 100644 libavcodec/pixblockdsp_template.c

breaks fate:

--- ./tests/ref/vsynth/vsynth1-dnxhd-720p-10bit 2015-10-17 19:12:46.567154035 
+0200
+++ tests/data/fate/vsynth1-dnxhd-720p-10bit2015-10-20 17:13:03.600463343 
+0200
@@ -1,4 +1,4 @@
-f8c4b7aa165a80df2485d526161290a3 
*tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
+cb729072a15682440da8a443f922ac8c 
*tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
 2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
-87f1f0e074466facd3a9922ecc8311db 
*tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
-stddev:6.23 PSNR: 32.23 MAXDIFF:   64 bytes:  7603200/   760320
+17bc71b58b687ffe06c13b99a5e55ac3 
*tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
+stddev:   32.92 PSNR: 17.78 MAXDIFF:  211 bytes:  7603200/   760320
Test vsynth1-dnxhd-720p-10bit failed. Look at 
tests/data/fate/vsynth1-dnxhd-720p-10bit.err for details.
make: *** [fate-vsynth1-dnxhd-720p-10bit] Error 1
make: *** Waiting for unfinished jobs
--- ./tests/ref/vsynth/vsynth1-dnxhd-1080i-10bit2015-10-17 
19:12:46.567154035 +0200
+++ tests/data/fate/vsynth1-dnxhd-1080i-10bit   2015-10-20 17:13:03.836463348 
+0200
@@ -1,4 +1,4 @@
-f562845d1848bf5d3e524b418b742e01 *tests/data/fate/vsynth1-dnxhd-1080i-10bit.mov
+90be2539a8ad5475cdda2421874ccfaa *tests/data/fate/vsynth1-dnxhd-1080i-10bit.mov
 4588391 tests/data/fate/vsynth1-dnxhd-1080i-10bit.mov
-31032fcb7e6af79daaac02288254c6d6 
*tests/data/fate/vsynth1-dnxhd-1080i-10bit.out.rawvideo
-stddev:5.69 PSNR: 33.02 MAXDIFF:   55 bytes:  7603200/   760320
+50a342f03d5b00a51b65804814c857ff 
*tests/data/fate/vsynth1-dnxhd-1080i-10bit.out.rawvideo
+stddev:   22.26 PSNR: 21.18 MAXDIFF:  233 bytes:  7603200/   760320
Test vsynth1-dnxhd-1080i-10bit failed. Look at 
tests/data/fate/vsynth1-dnxhd-1080i-10bit.err for details.
make: *** [fate-vsynth1-dnxhd-1080i-10bit] Error 1

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Republics decline into democracies and democracies degenerate into
despotisms. -- Aristotle


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] pixblockdsp: Use memcpy for get_pixels_16_c

2015-10-17 Thread Timothy Gu
Before:
  15543 decicycles in get_pixels, 4193214 runs,   1090 skips
After:
   5713 decicycles in get_pixels, 8387564 runs,   1044 skips
---
 libavcodec/pixblockdsp.c  | 38 -
 libavcodec/pixblockdsp_template.c | 40 ---
 2 files changed, 33 insertions(+), 45 deletions(-)
 delete mode 100644 libavcodec/pixblockdsp_template.c

diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 322e1dd..fc21ea4 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -23,12 +23,40 @@
 #include "avcodec.h"
 #include "pixblockdsp.h"
 
-#define BIT_DEPTH 16
-#include "pixblockdsp_template.c"
-#undef BIT_DEPTH
+static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
+ptrdiff_t line_size)
+{
+line_size /= 2;
+
+memcpy(block, pixels, 2 * 8);
+memcpy(block + 1 * 8, pixels + 1 * line_size, 2 * 8);
+memcpy(block + 2 * 8, pixels + 2 * line_size, 2 * 8);
+memcpy(block + 3 * 8, pixels + 3 * line_size, 2 * 8);
+memcpy(block + 4 * 8, pixels + 4 * line_size, 2 * 8);
+memcpy(block + 5 * 8, pixels + 5 * line_size, 2 * 8);
+memcpy(block + 6 * 8, pixels + 6 * line_size, 2 * 8);
+memcpy(block + 7 * 8, pixels + 7 * line_size, 2 * 8);
+}
+
+static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
+   ptrdiff_t line_size)
+{
+int i;
 
-#define BIT_DEPTH 8
-#include "pixblockdsp_template.c"
+/* read the pixels */
+for (i = 0; i < 8; i++) {
+block[0] = pixels[0];
+block[1] = pixels[1];
+block[2] = pixels[2];
+block[3] = pixels[3];
+block[4] = pixels[4];
+block[5] = pixels[5];
+block[6] = pixels[6];
+block[7] = pixels[7];
+pixels  += line_size;
+block   += 8;
+}
+}
 
 static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
   const uint8_t *s2, int stride)
diff --git a/libavcodec/pixblockdsp_template.c 
b/libavcodec/pixblockdsp_template.c
deleted file mode 100644
index d1e9102..000
--- a/libavcodec/pixblockdsp_template.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "bit_depth_template.c"
-
-static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t 
*_pixels,
-  ptrdiff_t line_size)
-{
-const pixel *pixels = (const pixel *) _pixels;
-int i;
-
-/* read the pixels */
-for (i = 0; i < 8; i++) {
-block[0] = pixels[0];
-block[1] = pixels[1];
-block[2] = pixels[2];
-block[3] = pixels[3];
-block[4] = pixels[4];
-block[5] = pixels[5];
-block[6] = pixels[6];
-block[7] = pixels[7];
-pixels  += line_size / sizeof(pixel);
-block   += 8;
-}
-}
-- 
1.9.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel