ffmpeg | branch: master | Clément Bœsch <u...@pkh.me> | Mon Mar 20 08:30:42 2017 +0100| [8200b16a9c77e66759a5b992bd1ae93c984702c7] | committer: Clément Bœsch
Merge commit 'd7bc52bf456deba0f32d9fe5c288ec441f1ebef5' * commit 'd7bc52bf456deba0f32d9fe5c288ec441f1ebef5': imgutils: add a function for copying image data from GPU mapped memory Merged-by: Clément Bœsch <u...@pkh.me> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8200b16a9c77e66759a5b992bd1ae93c984702c7 --- doc/APIchanges | 4 +++ libavutil/imgutils.c | 75 +++++++++++++++++++++++++++++++++++-------- libavutil/imgutils.h | 18 +++++++++++ libavutil/imgutils_internal.h | 30 +++++++++++++++++ libavutil/version.h | 2 +- libavutil/x86/Makefile | 2 ++ libavutil/x86/imgutils.asm | 53 ++++++++++++++++++++++++++++++ libavutil/x86/imgutils_init.c | 49 ++++++++++++++++++++++++++++ 8 files changed, 219 insertions(+), 14 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 6e18767..436fb71 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,10 @@ libavutil: 2015-08-28 API changes, most recent first: +2016-03-20 - xxxxxxx - lavu 55.50.100 / 55.21.0 - imgutils.h + Add av_image_copy_uc_from(), a version of av_image_copy() for copying + from GPU mapped memory. + 2017-03-20 - 9c2436e - lavu 55.49.100 - pixdesc.h Add AV_PIX_FMT_FLAG_BAYER pixel format flag. diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c index cc410ab..f8f2244 100644 --- a/libavutil/imgutils.c +++ b/libavutil/imgutils.c @@ -24,6 +24,7 @@ #include "avassert.h" #include "common.h" #include "imgutils.h" +#include "imgutils_internal.h" #include "internal.h" #include "intreadwrite.h" #include "log.h" @@ -303,9 +304,9 @@ int av_image_check_sar(unsigned int w, unsigned int h, AVRational sar) return AVERROR(EINVAL); } -void av_image_copy_plane(uint8_t *dst, int dst_linesize, - const uint8_t *src, int src_linesize, - int bytewidth, int height) +static void image_copy_plane(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) { if (!dst || !src) return; @@ -318,9 +319,33 @@ void av_image_copy_plane(uint8_t *dst, int dst_linesize, } } -void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], - const uint8_t *src_data[4], const int src_linesizes[4], - enum AVPixelFormat pix_fmt, int width, int height) +static void image_copy_plane_uc_from(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) +{ + int ret = -1; + +#if ARCH_X86 + ret = ff_image_copy_plane_uc_from_x86(dst, dst_linesize, src, src_linesize, + bytewidth, height); +#endif + + if (ret < 0) + image_copy_plane(dst, dst_linesize, src, src_linesize, bytewidth, height); +} + +void av_image_copy_plane(uint8_t *dst, int dst_linesize, + const uint8_t *src, int src_linesize, + int bytewidth, int height) +{ + image_copy_plane(dst, dst_linesize, src, src_linesize, bytewidth, height); +} + +static void image_copy(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height, + void (*copy_plane)(uint8_t *, ptrdiff_t, const uint8_t *, + ptrdiff_t, ptrdiff_t, int)) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); @@ -329,9 +354,9 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) { - av_image_copy_plane(dst_data[0], dst_linesizes[0], - src_data[0], src_linesizes[0], - width, height); + copy_plane(dst_data[0], dst_linesizes[0], + src_data[0], src_linesizes[0], + width, height); /* copy the palette */ memcpy(dst_data[1], src_data[1], 4*256); } else { @@ -342,7 +367,7 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], for (i = 0; i < planes_nb; i++) { int h = height; - int bwidth = av_image_get_linesize(pix_fmt, width, i); + ptrdiff_t bwidth = av_image_get_linesize(pix_fmt, width, i); if (bwidth < 0) { av_log(NULL, AV_LOG_ERROR, "av_image_get_linesize failed\n"); return; @@ -350,13 +375,37 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], if (i == 1 || i == 2) { h = AV_CEIL_RSHIFT(height, desc->log2_chroma_h); } - av_image_copy_plane(dst_data[i], dst_linesizes[i], - src_data[i], src_linesizes[i], - bwidth, h); + copy_plane(dst_data[i], dst_linesizes[i], + src_data[i], src_linesizes[i], + bwidth, h); } } } +void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], + const uint8_t *src_data[4], const int src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height) +{ + ptrdiff_t dst_linesizes1[4], src_linesizes1[4]; + int i; + + for (i = 0; i < 4; i++) { + dst_linesizes1[i] = dst_linesizes[i]; + src_linesizes1[i] = src_linesizes[i]; + } + + image_copy(dst_data, dst_linesizes1, src_data, src_linesizes1, pix_fmt, + width, height, image_copy_plane); +} + +void av_image_copy_uc_from(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height) +{ + image_copy(dst_data, dst_linesizes, src_data, src_linesizes, pix_fmt, + width, height, image_copy_plane_uc_from); +} + int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4], const uint8_t *src, enum AVPixelFormat pix_fmt, int width, int height, int align) diff --git a/libavutil/imgutils.h b/libavutil/imgutils.h index 40aee8b..a4a5efc 100644 --- a/libavutil/imgutils.h +++ b/libavutil/imgutils.h @@ -121,6 +121,24 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], enum AVPixelFormat pix_fmt, int width, int height); /** + * Copy image data located in uncacheable (e.g. GPU mapped) memory. Where + * available, this function will use special functionality for reading from such + * memory, which may result in greatly improved performance compared to plain + * av_image_copy(). + * + * The data pointers and the linesizes must be aligned to the maximum required + * by the CPU architecture. + * + * @note The linesize parameters have the type ptrdiff_t here, while they are + * int for av_image_copy(). + * @note On x86, the linesizes currently need to be aligned to the cacheline + * size (i.e. 64) to get improved performance. + */ +void av_image_copy_uc_from(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height); + +/** * Setup the data pointers and linesizes based on the specified image * parameters and the provided array. * diff --git a/libavutil/imgutils_internal.h b/libavutil/imgutils_internal.h new file mode 100644 index 0000000..d515858 --- /dev/null +++ b/libavutil/imgutils_internal.h @@ -0,0 +1,30 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_IMGUTILS_INTERNAL_H +#define AVUTIL_IMGUTILS_INTERNAL_H + +#include <stddef.h> +#include <stdint.h> + +int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + + +#endif /* AVUTIL_IMGUTILS_INTERNAL_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 25925b9..d50f92c 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 55 -#define LIBAVUTIL_VERSION_MINOR 49 +#define LIBAVUTIL_VERSION_MINOR 50 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 94d8832..2b0ba16 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -1,6 +1,7 @@ OBJS += x86/cpu.o \ x86/fixed_dsp_init.o \ x86/float_dsp_init.o \ + x86/imgutils_init.o \ x86/lls_init.o \ OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \ @@ -11,6 +12,7 @@ YASM-OBJS += x86/cpuid.o \ $(EMMS_OBJS__yes_) \ x86/fixed_dsp.o \ x86/float_dsp.o \ + x86/imgutils.o \ x86/lls.o \ YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ diff --git a/libavutil/x86/imgutils.asm b/libavutil/x86/imgutils.asm new file mode 100644 index 0000000..f5ebc0f --- /dev/null +++ b/libavutil/x86/imgutils.asm @@ -0,0 +1,53 @@ +;***************************************************************************** +;* Copyright 2016 Anton Khirnov +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_XMM sse4 +cglobal image_copy_plane_uc_from, 6, 7, 4, dst, dst_linesize, src, src_linesize, bw, height, rowpos + add dstq, bwq + add srcq, bwq + neg bwq + +.row_start + mov rowposq, bwq + +.loop + movntdqa m0, [srcq + rowposq + 0 * mmsize] + movntdqa m1, [srcq + rowposq + 1 * mmsize] + movntdqa m2, [srcq + rowposq + 2 * mmsize] + movntdqa m3, [srcq + rowposq + 3 * mmsize] + + mova [dstq + rowposq + 0 * mmsize], m0 + mova [dstq + rowposq + 1 * mmsize], m1 + mova [dstq + rowposq + 2 * mmsize], m2 + mova [dstq + rowposq + 3 * mmsize], m3 + + add rowposq, 4 * mmsize + jnz .loop + + add srcq, src_linesizeq + add dstq, dst_linesizeq + dec heightd + jnz .row_start + + RET diff --git a/libavutil/x86/imgutils_init.c b/libavutil/x86/imgutils_init.c new file mode 100644 index 0000000..4ea3982 --- /dev/null +++ b/libavutil/x86/imgutils_init.c @@ -0,0 +1,49 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stddef.h> +#include <stdint.h> + +#include "libavutil/cpu.h" +#include "libavutil/error.h" +#include "libavutil/imgutils.h" +#include "libavutil/imgutils_internal.h" +#include "libavutil/internal.h" + +#include "cpu.h" + +void ff_image_copy_plane_uc_from_sse4(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + +int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) +{ + int cpu_flags = av_get_cpu_flags(); + ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64); + + if (EXTERNAL_SSE4(cpu_flags) && + bw_aligned <= dst_linesize && bw_aligned <= src_linesize) + ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize, + bw_aligned, height); + else + return AVERROR(ENOSYS); + + return 0; +} ====================================================================== diff --cc doc/APIchanges index 6e18767,a4e418a..436fb71 --- a/doc/APIchanges +++ b/doc/APIchanges @@@ -15,43 -13,11 +15,47 @@@ libavutil: 2015-08-2 API changes, most recent first: -2016-08-24 - xxxxxxx - lavu 55.21.0 - imgutils.h ++2016-03-20 - xxxxxxx - lavu 55.50.100 / 55.21.0 - imgutils.h + Add av_image_copy_uc_from(), a version of av_image_copy() for copying + from GPU mapped memory. + -2016-xx-xx - xxxxxxx - lavc 59.26.0 - vaapi.h +2017-03-20 - 9c2436e - lavu 55.49.100 - pixdesc.h + Add AV_PIX_FMT_FLAG_BAYER pixel format flag. + +2017-03-18 - xxxxxxx - lavfi 6.77.100 - avfilter.h + Deprecate AVFilterGraph.resample_lavr_opts + It's never been used by avfilter nor passed to anything. + +2017-02-10 - xxxxxxx - lavu 55.48.100 / 55.33.0 - spherical.h + Add AV_SPHERICAL_EQUIRECTANGULAR_TILE, av_spherical_tile_bounds(), + and projection-specific properties (bound_left, bound_top, bound_right, + bound_bottom, padding) to AVSphericalMapping. + +2017-03-02 - xxxxxxx - lavc 57.81.104 - videotoolbox.h + AVVideotoolboxContext.cv_pix_fmt_type can now be set to 0 to output the + native decoder format. (The default value is not changed.) + +2017-03-02 - xxxxxxx - lavu 55.47.101, lavc 57.81.102, lavf 57.66.103 + Remove requirement to use AVOption or accessors to access certain fields + in AVFrame, AVCodecContext, and AVFormatContext that were previously + documented as "no direct access" allowed. + +2017-02-13 - xxxxxxx - lavc 57.80.100 - avcodec.h + Add AVCodecContext.hw_device_ctx. + +2017-02-11 - xxxxxxx - lavu 55.47.100 - frame.h + Add AVFrame.opaque_ref. + +2017-01-31 - xxxxxxx - lavu 55.46.100 / 55.20.0 - cpu.h + Add AV_CPU_FLAG_SSSE3SLOW. + +2017-01-24 - xxxxxxx - lavu 55.45.100 - channel_layout.h + Add av_get_extended_channel_layout() + +2017-01-22 - xxxxxxx - lavu 55.44.100 - lfg.h + Add av_lfg_init_from_data(). + +2017-01-xx - xxxxxxx - lavc 57.74.100 - vaapi.h Deprecate struct vaapi_context and the vaapi.h installed header. Callers should set AVCodecContext.hw_frames_ctx instead. diff --cc libavutil/imgutils.c index cc410ab,20d06ec..f8f2244 --- a/libavutil/imgutils.c +++ b/libavutil/imgutils.c @@@ -21,9 -21,9 +21,10 @@@ * misc image utilities */ +#include "avassert.h" #include "common.h" #include "imgutils.h" + #include "imgutils_internal.h" #include "internal.h" #include "intreadwrite.h" #include "log.h" @@@ -342,11 -314,7 +367,11 @@@ static void image_copy(uint8_t *dst_dat for (i = 0; i < planes_nb; i++) { int h = height; - int bwidth = av_image_get_linesize(pix_fmt, width, i); + ptrdiff_t bwidth = av_image_get_linesize(pix_fmt, width, i); + if (bwidth < 0) { + av_log(NULL, AV_LOG_ERROR, "av_image_get_linesize failed\n"); + return; + } if (i == 1 || i == 2) { h = AV_CEIL_RSHIFT(height, desc->log2_chroma_h); } diff --cc libavutil/imgutils_internal.h index 0000000,7a932a5..d515858 mode 000000,100644..100644 --- a/libavutil/imgutils_internal.h +++ b/libavutil/imgutils_internal.h @@@ -1,0 -1,30 +1,30 @@@ + /* - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #ifndef AVUTIL_IMGUTILS_INTERNAL_H + #define AVUTIL_IMGUTILS_INTERNAL_H + + #include <stddef.h> + #include <stdint.h> + + int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + + + #endif /* AVUTIL_IMGUTILS_INTERNAL_H */ diff --cc libavutil/version.h index 25925b9,f63dfa5..d50f92c --- a/libavutil/version.h +++ b/libavutil/version.h @@@ -78,9 -53,9 +78,9 @@@ * @{ */ -#define LIBAVUTIL_VERSION_MAJOR 55 -#define LIBAVUTIL_VERSION_MINOR 21 -#define LIBAVUTIL_VERSION_MICRO 0 +#define LIBAVUTIL_VERSION_MAJOR 55 - #define LIBAVUTIL_VERSION_MINOR 49 ++#define LIBAVUTIL_VERSION_MINOR 50 +#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --cc libavutil/x86/Makefile index 94d8832,ffee43b..2b0ba16 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@@ -1,16 -1,10 +1,18 @@@ OBJS += x86/cpu.o \ + x86/fixed_dsp_init.o \ x86/float_dsp_init.o \ + x86/imgutils_init.o \ x86/lls_init.o \ +OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \ + +EMMS_OBJS_$(HAVE_MMX_INLINE)_$(HAVE_MMX_EXTERNAL)_$(HAVE_MM_EMPTY) = x86/emms.o + YASM-OBJS += x86/cpuid.o \ - x86/emms.o \ + $(EMMS_OBJS__yes_) \ + x86/fixed_dsp.o \ x86/float_dsp.o \ + x86/imgutils.o \ x86/lls.o \ + +YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ diff --cc libavutil/x86/imgutils.asm index 0000000,9c1e940..f5ebc0f mode 000000,100644..100644 --- a/libavutil/x86/imgutils.asm +++ b/libavutil/x86/imgutils.asm @@@ -1,0 -1,53 +1,53 @@@ + ;***************************************************************************** + ;* Copyright 2016 Anton Khirnov + ;* -;* This file is part of Libav. ++;* This file is part of FFmpeg. + ;* -;* Libav is free software; you can redistribute it and/or ++;* FFmpeg is free software; you can redistribute it and/or + ;* modify it under the terms of the GNU Lesser General Public + ;* License as published by the Free Software Foundation; either + ;* version 2.1 of the License, or (at your option) any later version. + ;* -;* Libav is distributed in the hope that it will be useful, ++;* FFmpeg is distributed in the hope that it will be useful, + ;* but WITHOUT ANY WARRANTY; without even the implied warranty of + ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + ;* Lesser General Public License for more details. + ;* + ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software ++;* License along with FFmpeg; if not, write to the Free Software + ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ;****************************************************************************** + + %include "libavutil/x86/x86util.asm" + + SECTION .text + + INIT_XMM sse4 + cglobal image_copy_plane_uc_from, 6, 7, 4, dst, dst_linesize, src, src_linesize, bw, height, rowpos + add dstq, bwq + add srcq, bwq + neg bwq + + .row_start + mov rowposq, bwq + + .loop + movntdqa m0, [srcq + rowposq + 0 * mmsize] + movntdqa m1, [srcq + rowposq + 1 * mmsize] + movntdqa m2, [srcq + rowposq + 2 * mmsize] + movntdqa m3, [srcq + rowposq + 3 * mmsize] + + mova [dstq + rowposq + 0 * mmsize], m0 + mova [dstq + rowposq + 1 * mmsize], m1 + mova [dstq + rowposq + 2 * mmsize], m2 + mova [dstq + rowposq + 3 * mmsize], m3 + + add rowposq, 4 * mmsize + jnz .loop + + add srcq, src_linesizeq + add dstq, dst_linesizeq + dec heightd + jnz .row_start + + RET diff --cc libavutil/x86/imgutils_init.c index 0000000,20ee9ab..4ea3982 mode 000000,100644..100644 --- a/libavutil/x86/imgutils_init.c +++ b/libavutil/x86/imgutils_init.c @@@ -1,0 -1,49 +1,49 @@@ + /* - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #include <stddef.h> + #include <stdint.h> + + #include "libavutil/cpu.h" + #include "libavutil/error.h" + #include "libavutil/imgutils.h" + #include "libavutil/imgutils_internal.h" + #include "libavutil/internal.h" + + #include "cpu.h" + + void ff_image_copy_plane_uc_from_sse4(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + + int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) + { + int cpu_flags = av_get_cpu_flags(); + ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64); + + if (EXTERNAL_SSE4(cpu_flags) && + bw_aligned <= dst_linesize && bw_aligned <= src_linesize) + ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize, + bw_aligned, height); + else + return AVERROR(ENOSYS); + + return 0; + } _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog