--- configure | 2 + doc/APIchanges | 3 + libavutil/Makefile | 3 + libavutil/pixelutils.c | 142 ++++++++++++++++++++++++++++++++++++ libavutil/pixelutils.h | 52 ++++++++++++++ libavutil/version.h | 2 +- libavutil/x86/Makefile | 4 ++ libavutil/x86/pixelutils.asm | 155 ++++++++++++++++++++++++++++++++++++++++ libavutil/x86/pixelutils.h | 26 +++++++ libavutil/x86/pixelutils_init.c | 58 +++++++++++++++ tests/fate/libavutil.mak | 5 ++ tests/ref/fate/pixelutils | 15 ++++ 12 files changed, 466 insertions(+), 1 deletion(-) create mode 100644 libavutil/pixelutils.c create mode 100644 libavutil/pixelutils.h create mode 100644 libavutil/x86/pixelutils.asm create mode 100644 libavutil/x86/pixelutils.h create mode 100644 libavutil/x86/pixelutils_init.c create mode 100644 tests/ref/fate/pixelutils
diff --git a/configure b/configure index 9c3af50..57edd1d 100755 --- a/configure +++ b/configure @@ -144,6 +144,7 @@ Component options: --disable-mdct disable MDCT code --disable-rdft disable RDFT code --disable-fft disable FFT code + --disable-pixelutils disable pixel utils in libavutil Hardware accelerators: --disable-dxva2 disable DXVA2 code [autodetect] @@ -1451,6 +1452,7 @@ SUBSYSTEM_LIST=" lsp lzo mdct + pixelutils network rdft " diff --git a/doc/APIchanges b/doc/APIchanges index abca377..69ca682 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2012-10-22 API changes, most recent first: +2014-08-02 - xxxxxxx - lavu 52.95.100 - pixelutils.h + Add pixelutils API with SAD functions + 2014-07-30 - ba3e331 - lavu 52.94.100 - frame.h Add av_frame_side_data_name() diff --git a/libavutil/Makefile b/libavutil/Makefile index 91751dc..d57a741 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -44,6 +44,7 @@ HEADERS = adler32.h \ opt.h \ parseutils.h \ pixdesc.h \ + pixelutils.h \ pixfmt.h \ random_seed.h \ replaygain.h \ @@ -113,6 +114,7 @@ OBJS = adler32.o \ opt.o \ parseutils.o \ pixdesc.o \ + pixelutils.o \ random_seed.o \ rational.o \ rc4.o \ @@ -170,6 +172,7 @@ TESTPROGS = adler32 \ pca \ parseutils \ pixdesc \ + pixelutils \ random_seed \ rational \ ripemd \ diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c new file mode 100644 index 0000000..278aa80 --- /dev/null +++ b/libavutil/pixelutils.c @@ -0,0 +1,142 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "common.h" +#include "pixelutils.h" + +#if CONFIG_PIXELUTILS + +#include "x86/pixelutils.h" + +static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2, + int w, int h) +{ + int x, y, sum = 0; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) + sum += abs(src1[x] - src2[x]); + src1 += stride1; + src2 += stride2; + } + return sum; +} + +#define DECLARE_BLOCK_FUNCTIONS(size) \ +static int block_sad_##size##x##size##_c(const uint8_t *src1, ptrdiff_t stride1, \ + const uint8_t *src2, ptrdiff_t stride2) \ +{ \ + return sad_wxh(src1, stride1, src2, stride2, size, size); \ +} + +DECLARE_BLOCK_FUNCTIONS(2) +DECLARE_BLOCK_FUNCTIONS(4) +DECLARE_BLOCK_FUNCTIONS(8) +DECLARE_BLOCK_FUNCTIONS(16) + +static const av_pixelutils_sad_fn sad_c[] = { + block_sad_2x2_c, + block_sad_4x4_c, + block_sad_8x8_c, + block_sad_16x16_c, +}; + +#endif /* CONFIG_PIXELUTILS */ + +av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligned, void *log_ctx) +{ +#if !CONFIG_PIXELUTILS + av_log(log_ctx, AV_LOG_ERROR, "pixelutils support is required " + "but libavutil is not compiled with it\n"); + return NULL; +#else + av_pixelutils_sad_fn sad[FF_ARRAY_ELEMS(sad_c)]; + + memcpy(sad, sad_c, sizeof(sad)); + + if (w_bits < 1 || w_bits > FF_ARRAY_ELEMS(sad) || + h_bits < 1 || h_bits > FF_ARRAY_ELEMS(sad)) + return NULL; + if (w_bits != h_bits) // only squared sad for now + return NULL; + +#if ARCH_X86 + ff_pixelutils_sad_init_x86(sad, aligned); +#endif + + return sad[w_bits - 1]; +#endif +} + +#ifdef TEST +#define W1 320 +#define H1 240 +#define W2 640 +#define H2 480 +int main(void) +{ + int i, a, ret = 0; + DECLARE_ALIGNED(32, uint32_t, buf1)[W1*H1]; + DECLARE_ALIGNED(32, uint32_t, buf2)[W2*H2]; + uint32_t state = 0; + + for (i = 0; i < W1*H1; i++) { + buf1[i] = state; + state = state * 1664525 + 1013904223; + } + + for (i = 0; i < W2*H2; i++) { + buf2[i] = state; + state = state * 1664525 + 1013904223; + } + + for (a = 0; a < 3; a++) { + const uint8_t *block1 = (const uint8_t *)buf1; + const uint8_t *block2 = (const uint8_t *)buf2; + + switch (a) { + case 0: + printf(":: src1 unaligned | src2 unaligned\n"); + block1++; + block2++; + break; + case 1: + printf(":: src1 aligned | src2 unaligned\n"); + block2++; + break; + case 2: + printf(":: src1 aligned | src2 aligned\n"); + break; + } + for (i = 1; i <= FF_ARRAY_ELEMS(sad_c); i++) { + av_pixelutils_sad_fn f_ref = sad_c[i - 1]; + av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(i, i, a, NULL); + const int out = f_out(block1, W1, block2, W2); + const int ref = f_ref(block1, W1, block2, W2); + printf("[%s] SAD %dx%d=%d ref=%d\n", + out == ref ? "OK" : "FAIL", 1<<i, 1<<i, out, ref); + if (out != ref) + ret = 1; + } + } + + return ret; +} +#endif /* TEST */ diff --git a/libavutil/pixelutils.h b/libavutil/pixelutils.h new file mode 100644 index 0000000..3c0fb16 --- /dev/null +++ b/libavutil/pixelutils.h @@ -0,0 +1,52 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_PIXELUTILS_H +#define AVUTIL_PIXELUTILS_H + +#include <stddef.h> +#include <stdint.h> +#include "common.h" + +/** + * Sum of abs(src1[x] - src2[x]) + */ +typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +/** + * Get a potentially optimized pointer to a Sum-of-absolute-differences + * function (see the av_pixelutils_sad_fn prototype). + * + * @param w_bits 1<<w_bits is the requested width of the block size + * @param h_bits 1<<h_bits is the requested height of the block size + * @param aligned If set to 2, the returned sad function will assume src1 and + * src2 addresses are aligned on the block size. + * If set to 1, the returned sad function will assume src2 is + * aligned on the block size. + * If set to 0, the returned sad function assume no particular + * alignment. + * @param log_ctx context used for logging, can be NULL + * + * @return a pointer to the SAD function or NULL in case of error (because of + * invalid parameters) + */ +av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, + int aligned, void *log_ctx); + +#endif /* AVUTIL_PIXELUTILS_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 42e2db5..9af8f5f 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -56,7 +56,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 52 -#define LIBAVUTIL_VERSION_MINOR 94 +#define LIBAVUTIL_VERSION_MINOR 95 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 1e19082..ad3bdfc 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -2,7 +2,11 @@ OBJS += x86/cpu.o \ x86/float_dsp_init.o \ x86/lls_init.o \ +OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \ + YASM-OBJS += x86/cpuid.o \ x86/emms.o \ x86/float_dsp.o \ x86/lls.o \ + +YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm new file mode 100644 index 0000000..8ab0a18 --- /dev/null +++ b/libavutil/x86/pixelutils.asm @@ -0,0 +1,155 @@ +;****************************************************************************** +;* Pixel utilities SIMD +;* +;* Copyright (C) 2002-2004 Michael Niedermayer <michae...@gmx.at> +;* Copyright (C) 2014 Clément Bœsch <u pkh me> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86util.asm" + +SECTION_TEXT + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmx +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m7, m7 + pxor m6, m6 +%rep 4 + mova m0, [src1q] + mova m2, [src1q + stride1q] + mova m1, [src2q] + mova m3, [src2q + stride2q] + psubusb m4, m0, m1 + psubusb m5, m2, m3 + psubusb m1, m0 + psubusb m3, m2 + por m1, m4 + por m3, m5 + punpcklbw m0, m1, m7 + punpcklbw m2, m3, m7 + punpckhbw m1, m7 + punpckhbw m3, m7 + paddw m0, m1 + paddw m2, m3 + paddw m0, m2 + paddw m6, m0 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + psrlq m0, m6, 32 + paddw m6, m0 + psrlq m0, m6, 16 + paddw m6, m0 + movd eax, m6 + movzx eax, ax + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 4 + mova m0, [src1q] + mova m1, [src1q + stride1q] + psadbw m0, [src2q] + psadbw m1, [src2q + stride2q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 16 + mova m0, [src1q] + mova m1, [src1q + 8] + psadbw m0, [src2q] + psadbw m1, [src2q + 8] + paddw m2, m0 + paddw m2, m1 + add src1q, stride1q + add src2q, stride2q +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 + pxor m4, m4 +%rep 8 + movu m0, [src1q] + movu m1, [src1q + stride1q] + movu m2, [src2q] + movu m3, [src2q + stride2q] + psadbw m0, m2 + psadbw m1, m3 + paddw m4, m0 + paddw m4, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m4 + paddw m4, m0 + movd eax, m4 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +%macro SAD_XMM_16x16 1 +INIT_XMM sse2 +cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 8 + mov%1 m0, [src2q] + mov%1 m1, [src2q + stride2q] + psadbw m0, [src1q] + psadbw m1, [src1q + stride1q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m2 + paddw m2, m0 + movd eax, m2 + RET +%endmacro + +SAD_XMM_16x16 a +SAD_XMM_16x16 u diff --git a/libavutil/x86/pixelutils.h b/libavutil/x86/pixelutils.h new file mode 100644 index 0000000..876cf46 --- /dev/null +++ b/libavutil/x86/pixelutils.h @@ -0,0 +1,26 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_X86_PIXELUTILS_H +#define AVUTIL_X86_PIXELUTILS_H + +#include "libavutil/pixelutils.h" + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned); + +#endif /* AVUTIL_X86_PIXELUTILS_H */ diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c new file mode 100644 index 0000000..d600510 --- /dev/null +++ b/libavutil/x86/pixelutils_init.c @@ -0,0 +1,58 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "pixelutils.h" +#include "cpu.h" + +int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmx; + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmxext; + sad[3] = ff_pixelutils_sad_16x16_mmxext; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + switch (aligned) { + case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned + case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned + case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned + } + } +} diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak index 526ec16..68af50f 100644 --- a/tests/fate/libavutil.mak +++ b/tests/fate/libavutil.mak @@ -74,6 +74,10 @@ FATE_LIBAVUTIL += fate-parseutils fate-parseutils: libavutil/parseutils-test$(EXESUF) fate-parseutils: CMD = run libavutil/parseutils-test +FATE_LIBAVUTIL-$(CONFIG_PIXELUTILS) += fate-pixelutils +fate-pixelutils: libavutil/pixelutils-test$(EXESUF) +fate-pixelutils: CMD = run libavutil/pixelutils-test + FATE_LIBAVUTIL += fate-random_seed fate-random_seed: libavutil/random_seed-test$(EXESUF) fate-random_seed: CMD = run libavutil/random_seed-test @@ -99,5 +103,6 @@ FATE_LIBAVUTIL += fate-xtea fate-xtea: libavutil/xtea-test$(EXESUF) fate-xtea: CMD = run libavutil/xtea-test +FATE_LIBAVUTIL += $(FATE_LIBAVUTIL-yes) FATE-$(CONFIG_AVUTIL) += $(FATE_LIBAVUTIL) fate-libavutil: $(FATE_LIBAVUTIL) diff --git a/tests/ref/fate/pixelutils b/tests/ref/fate/pixelutils new file mode 100644 index 0000000..308285c --- /dev/null +++ b/tests/ref/fate/pixelutils @@ -0,0 +1,15 @@ +:: src1 unaligned | src2 unaligned +[OK] SAD 2x2=436 ref=436 +[OK] SAD 4x4=1232 ref=1232 +[OK] SAD 8x8=4906 ref=4906 +[OK] SAD 16x16=21601 ref=21601 +:: src1 aligned | src2 unaligned +[OK] SAD 2x2=280 ref=280 +[OK] SAD 4x4=1724 ref=1724 +[OK] SAD 8x8=5988 ref=5988 +[OK] SAD 16x16=22625 ref=22625 +:: src1 aligned | src2 aligned +[OK] SAD 2x2=366 ref=366 +[OK] SAD 4x4=1264 ref=1264 +[OK] SAD 8x8=4874 ref=4874 +[OK] SAD 16x16=21217 ref=21217 -- 2.0.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel