[FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes
Signed-off-by: Paul B Mahol--- libavfilter/blend.h | 70 ++ libavfilter/vf_blend.c | 100 +-- libavfilter/x86/Makefile| 4 + libavfilter/x86/vf_blend.asm| 278 libavfilter/x86/vf_blend_init.c | 97 ++ 5 files changed, 478 insertions(+), 71 deletions(-) create mode 100644 libavfilter/blend.h create mode 100644 libavfilter/x86/vf_blend.asm create mode 100644 libavfilter/x86/vf_blend_init.c diff --git a/libavfilter/blend.h b/libavfilter/blend.h new file mode 100644 index 000..c22ecd2 --- /dev/null +++ b/libavfilter/blend.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2013 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/eval.h" +#include "avfilter.h" + +enum BlendMode { +BLEND_UNSET = -1, +BLEND_NORMAL, +BLEND_ADDITION, +BLEND_AND, +BLEND_AVERAGE, +BLEND_BURN, +BLEND_DARKEN, +BLEND_DIFFERENCE, +BLEND_DIFFERENCE128, +BLEND_DIVIDE, +BLEND_DODGE, +BLEND_EXCLUSION, +BLEND_HARDLIGHT, +BLEND_LIGHTEN, +BLEND_MULTIPLY, +BLEND_NEGATION, +BLEND_OR, +BLEND_OVERLAY, +BLEND_PHOENIX, +BLEND_PINLIGHT, +BLEND_REFLECT, +BLEND_SCREEN, +BLEND_SOFTLIGHT, +BLEND_SUBTRACT, +BLEND_VIVIDLIGHT, +BLEND_XOR, +BLEND_HARDMIX, +BLEND_LINEARLIGHT, +BLEND_GLOW, +BLEND_ADDITION128, +BLEND_NB +}; + +typedef struct FilterParams { +enum BlendMode mode; +double opacity; +AVExpr *e; +char *expr_str; +void (*blend)(const uint8_t *top, ptrdiff_t top_linesize, + const uint8_t *bottom, ptrdiff_t bottom_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + int width, int start, int end, + struct FilterParams *param, double *values); +} FilterParams; + +void ff_blend_init_x86(FilterParams *param, int is_16bit); diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c index f6a649b..b19eb6b 100644 --- a/libavfilter/vf_blend.c +++ b/libavfilter/vf_blend.c @@ -28,69 +28,12 @@ #include "internal.h" #include "dualinput.h" #include "video.h" +#include "blend.h" #define TOP0 #define BOTTOM 1 -enum BlendMode { -BLEND_UNSET = -1, -BLEND_NORMAL, -BLEND_ADDITION, -BLEND_AND, -BLEND_AVERAGE, -BLEND_BURN, -BLEND_DARKEN, -BLEND_DIFFERENCE, -BLEND_DIFFERENCE128, -BLEND_DIVIDE, -BLEND_DODGE, -BLEND_EXCLUSION, -BLEND_HARDLIGHT, -BLEND_LIGHTEN, -BLEND_MULTIPLY, -BLEND_NEGATION, -BLEND_OR, -BLEND_OVERLAY, -BLEND_PHOENIX, -BLEND_PINLIGHT, -BLEND_REFLECT, -BLEND_SCREEN, -BLEND_SOFTLIGHT, -BLEND_SUBTRACT, -BLEND_VIVIDLIGHT, -BLEND_XOR, -BLEND_HARDMIX, -BLEND_LINEARLIGHT, -BLEND_GLOW, -BLEND_ADDITION128, -BLEND_NB -}; - -static const char *const var_names[] = { "X", "Y", "W", "H", "SW", "SH", "T", "N", "A", "B", "TOP", "BOTTOM",NULL }; -enum { VAR_X, VAR_Y, VAR_W, VAR_H, VAR_SW, VAR_SH, VAR_T, VAR_N, VAR_A, VAR_B, VAR_TOP, VAR_BOTTOM, VAR_VARS_NB }; - -typedef struct FilterParams { -enum BlendMode mode; -double opacity; -AVExpr *e; -char *expr_str; -void (*blend)(const uint8_t *top, int top_linesize, - const uint8_t *bottom, int bottom_linesize, - uint8_t *dst, int dst_linesize, - int width, int start, int end, - struct FilterParams *param, double *values); -} FilterParams; - -typedef struct ThreadData { -const AVFrame *top, *bottom; -AVFrame *dst; -AVFilterLink *inlink; -int plane; -int w, h; -FilterParams *param; -} ThreadData; - -typedef struct { +typedef struct BlendContext { const AVClass *class; FFDualInputContext dinput; int hsub, vsub; ///< chroma subsampling values @@ -104,6 +47,18 @@ typedef struct { AVFrame *prev_frame;/* only used with tblend */ } BlendContext; +static const char *const var_names[] = { "X", "Y", "W", "H", "SW", "SH", "T", "N", "A", "B", "TOP", "BOTTOM",NULL }; +enum
Re: [FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes
On Fri, Oct 2, 2015 at 6:57 PM, Paul B Maholwrote: > +INIT_XMM sse2 > +cglobal blend_xor, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, > dst, dst_linesize, width, start, end [...] > +cglobal blend_or, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, > dst, dst_linesize, width, start, end [...] > +cglobal blend_and, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, > dst, dst_linesize, width, start, end You could do those using floating point operations (xorps, orps, andps), then you only need SSE instead of SSE2 (and AVX instead of AVX2 if you want to make versions using ymm registers). > +cglobal blend_addition, 9, 10, 3, 0, top, top_linesize, bottom, > bottom_linesize, dst, dst_linesize, width, start, end [...] > +punpcklbw m0, m2 > +punpcklbw m1, m2 > +paddw m0, m1 > +packuswbm0, m0 > +movh[dstq + x], m0 > +add r10q, mmsize / 2 paddusb > +cglobal blend_subtract, 9, 10, 3, 0, top, top_linesize, bottom, > bottom_linesize, dst, dst_linesize, width, start, end [...] > +punpcklbw m0, m2 > +punpcklbw m1, m2 > +psubw m0, m1 > +packuswbm0, m0 psubusb > +cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, > bottom_linesize, dst, dst_linesize, width, start, end [...] > +movhm0, [topq + x] > +movhm1, [bottomq + x] > +pminub m0, m1 > +movh[dstq + x], m0 [...] > +cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, > bottom_linesize, dst, dst_linesize, width, start, end [...] > +movhm0, [topq + x] > +movhm1, [bottomq + x] > +pmaxub m0, m1 > +movh[dstq + x], m0 You're only utilizing the lower half the registers here. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes
Signed-off-by: Paul B Mahol--- libavfilter/blend.h | 70 ++ libavfilter/vf_blend.c | 100 +-- libavfilter/x86/Makefile| 4 + libavfilter/x86/vf_blend.asm| 278 libavfilter/x86/vf_blend_init.c | 97 ++ 5 files changed, 478 insertions(+), 71 deletions(-) create mode 100644 libavfilter/blend.h create mode 100644 libavfilter/x86/vf_blend.asm create mode 100644 libavfilter/x86/vf_blend_init.c diff --git a/libavfilter/blend.h b/libavfilter/blend.h new file mode 100644 index 000..c22ecd2 --- /dev/null +++ b/libavfilter/blend.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2013 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/eval.h" +#include "avfilter.h" + +enum BlendMode { +BLEND_UNSET = -1, +BLEND_NORMAL, +BLEND_ADDITION, +BLEND_AND, +BLEND_AVERAGE, +BLEND_BURN, +BLEND_DARKEN, +BLEND_DIFFERENCE, +BLEND_DIFFERENCE128, +BLEND_DIVIDE, +BLEND_DODGE, +BLEND_EXCLUSION, +BLEND_HARDLIGHT, +BLEND_LIGHTEN, +BLEND_MULTIPLY, +BLEND_NEGATION, +BLEND_OR, +BLEND_OVERLAY, +BLEND_PHOENIX, +BLEND_PINLIGHT, +BLEND_REFLECT, +BLEND_SCREEN, +BLEND_SOFTLIGHT, +BLEND_SUBTRACT, +BLEND_VIVIDLIGHT, +BLEND_XOR, +BLEND_HARDMIX, +BLEND_LINEARLIGHT, +BLEND_GLOW, +BLEND_ADDITION128, +BLEND_NB +}; + +typedef struct FilterParams { +enum BlendMode mode; +double opacity; +AVExpr *e; +char *expr_str; +void (*blend)(const uint8_t *top, ptrdiff_t top_linesize, + const uint8_t *bottom, ptrdiff_t bottom_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + int width, int start, int end, + struct FilterParams *param, double *values); +} FilterParams; + +void ff_blend_init_x86(FilterParams *param, int is_16bit); diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c index f6a649b..b19eb6b 100644 --- a/libavfilter/vf_blend.c +++ b/libavfilter/vf_blend.c @@ -28,69 +28,12 @@ #include "internal.h" #include "dualinput.h" #include "video.h" +#include "blend.h" #define TOP0 #define BOTTOM 1 -enum BlendMode { -BLEND_UNSET = -1, -BLEND_NORMAL, -BLEND_ADDITION, -BLEND_AND, -BLEND_AVERAGE, -BLEND_BURN, -BLEND_DARKEN, -BLEND_DIFFERENCE, -BLEND_DIFFERENCE128, -BLEND_DIVIDE, -BLEND_DODGE, -BLEND_EXCLUSION, -BLEND_HARDLIGHT, -BLEND_LIGHTEN, -BLEND_MULTIPLY, -BLEND_NEGATION, -BLEND_OR, -BLEND_OVERLAY, -BLEND_PHOENIX, -BLEND_PINLIGHT, -BLEND_REFLECT, -BLEND_SCREEN, -BLEND_SOFTLIGHT, -BLEND_SUBTRACT, -BLEND_VIVIDLIGHT, -BLEND_XOR, -BLEND_HARDMIX, -BLEND_LINEARLIGHT, -BLEND_GLOW, -BLEND_ADDITION128, -BLEND_NB -}; - -static const char *const var_names[] = { "X", "Y", "W", "H", "SW", "SH", "T", "N", "A", "B", "TOP", "BOTTOM",NULL }; -enum { VAR_X, VAR_Y, VAR_W, VAR_H, VAR_SW, VAR_SH, VAR_T, VAR_N, VAR_A, VAR_B, VAR_TOP, VAR_BOTTOM, VAR_VARS_NB }; - -typedef struct FilterParams { -enum BlendMode mode; -double opacity; -AVExpr *e; -char *expr_str; -void (*blend)(const uint8_t *top, int top_linesize, - const uint8_t *bottom, int bottom_linesize, - uint8_t *dst, int dst_linesize, - int width, int start, int end, - struct FilterParams *param, double *values); -} FilterParams; - -typedef struct ThreadData { -const AVFrame *top, *bottom; -AVFrame *dst; -AVFilterLink *inlink; -int plane; -int w, h; -FilterParams *param; -} ThreadData; - -typedef struct { +typedef struct BlendContext { const AVClass *class; FFDualInputContext dinput; int hsub, vsub; ///< chroma subsampling values @@ -104,6 +47,18 @@ typedef struct { AVFrame *prev_frame;/* only used with tblend */ } BlendContext; +static const char *const var_names[] = { "X", "Y", "W", "H", "SW", "SH", "T", "N", "A", "B", "TOP", "BOTTOM",NULL }; +enum
Re: [FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes
On 10/2/15, Paul B Maholwrote: > Signed-off-by: Paul B Mahol > --- > libavfilter/blend.h | 70 ++ > libavfilter/vf_blend.c | 100 +-- > libavfilter/x86/Makefile| 4 + > libavfilter/x86/vf_blend.asm| 278 > > libavfilter/x86/vf_blend_init.c | 97 ++ > 5 files changed, 478 insertions(+), 71 deletions(-) > create mode 100644 libavfilter/blend.h > create mode 100644 libavfilter/x86/vf_blend.asm > create mode 100644 libavfilter/x86/vf_blend_init.c > [..] > + > +cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, > bottom_linesize, dst, dst_linesize, width, start, end > +add topq, widthq > +add bottomq, widthq > +add dstq, widthq > +sub endq, startq > +negwidthq > +.nextrow: > +mov r10q, widthq > +%define x r10q > + > +.loop: > +movhm0, [topq + x] > +movhm1, [bottomq + x] > +pminub m0, m1 > +movh[dstq + x], m0 > +add r10q, mmsize / 2 Removed division. > +jl .loop > + > +add topq, top_linesizeq > +add bottomq, bottom_linesizeq > +add dstq, dst_linesizeq > +sub endd, 1 > +jg .nextrow > +REP_RET > + > +cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, > bottom_linesize, dst, dst_linesize, width, start, end > +add topq, widthq > +add bottomq, widthq > +add dstq, widthq > +sub endq, startq > +negwidthq > +.nextrow: > +mov r10q, widthq > +%define x r10q > + > +.loop: > +movhm0, [topq + x] > +movhm1, [bottomq + x] > +pmaxub m0, m1 > +movh[dstq + x], m0 > +add r10q, mmsize / 2 Removed division. > +jl .loop > + > +add topq, top_linesizeq > +add bottomq, bottom_linesizeq > +add dstq, dst_linesizeq > +sub endd, 1 > +jg .nextrow > +REP_RET > + > +%endif [...] ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes
On 10/2/15, Henrik Gramnerwrote: > On Fri, Oct 2, 2015 at 6:57 PM, Paul B Mahol wrote: >> +INIT_XMM sse2 >> +cglobal blend_xor, 9, 10, 2, 0, top, top_linesize, bottom, >> bottom_linesize, dst, dst_linesize, width, start, end > [...] >> +cglobal blend_or, 9, 10, 2, 0, top, top_linesize, bottom, >> bottom_linesize, dst, dst_linesize, width, start, end > [...] >> +cglobal blend_and, 9, 10, 2, 0, top, top_linesize, bottom, >> bottom_linesize, dst, dst_linesize, width, start, end > > You could do those using floating point operations (xorps, orps, > andps), then you only need SSE instead of SSE2 (and AVX instead of > AVX2 if you want to make versions using ymm registers). > >> +cglobal blend_addition, 9, 10, 3, 0, top, top_linesize, bottom, >> bottom_linesize, dst, dst_linesize, width, start, end > [...] >> +punpcklbw m0, m2 >> +punpcklbw m1, m2 >> +paddw m0, m1 >> +packuswbm0, m0 >> +movh[dstq + x], m0 >> +add r10q, mmsize / 2 > > paddusb > fixed locally. >> +cglobal blend_subtract, 9, 10, 3, 0, top, top_linesize, bottom, >> bottom_linesize, dst, dst_linesize, width, start, end > [...] >> +punpcklbw m0, m2 >> +punpcklbw m1, m2 >> +psubw m0, m1 >> +packuswbm0, m0 > > psubusb fixed locally. > >> +cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, >> bottom_linesize, dst, dst_linesize, width, start, end > [...] >> +movhm0, [topq + x] >> +movhm1, [bottomq + x] >> +pminub m0, m1 >> +movh[dstq + x], m0 > [...] >> +cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, >> bottom_linesize, dst, dst_linesize, width, start, end > [...] >> +movhm0, [topq + x] >> +movhm1, [bottomq + x] >> +pmaxub m0, m1 >> +movh[dstq + x], m0 > > You're only utilizing the lower half the registers here. fixed locally. > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel