[FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes

2015-10-02 Thread Paul B Mahol
Signed-off-by: Paul B Mahol 
---
 libavfilter/blend.h |  70 ++
 libavfilter/vf_blend.c  | 100 +--
 libavfilter/x86/Makefile|   4 +
 libavfilter/x86/vf_blend.asm| 278 
 libavfilter/x86/vf_blend_init.c |  97 ++
 5 files changed, 478 insertions(+), 71 deletions(-)
 create mode 100644 libavfilter/blend.h
 create mode 100644 libavfilter/x86/vf_blend.asm
 create mode 100644 libavfilter/x86/vf_blend_init.c

diff --git a/libavfilter/blend.h b/libavfilter/blend.h
new file mode 100644
index 000..c22ecd2
--- /dev/null
+++ b/libavfilter/blend.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/eval.h"
+#include "avfilter.h"
+
+enum BlendMode {
+BLEND_UNSET = -1,
+BLEND_NORMAL,
+BLEND_ADDITION,
+BLEND_AND,
+BLEND_AVERAGE,
+BLEND_BURN,
+BLEND_DARKEN,
+BLEND_DIFFERENCE,
+BLEND_DIFFERENCE128,
+BLEND_DIVIDE,
+BLEND_DODGE,
+BLEND_EXCLUSION,
+BLEND_HARDLIGHT,
+BLEND_LIGHTEN,
+BLEND_MULTIPLY,
+BLEND_NEGATION,
+BLEND_OR,
+BLEND_OVERLAY,
+BLEND_PHOENIX,
+BLEND_PINLIGHT,
+BLEND_REFLECT,
+BLEND_SCREEN,
+BLEND_SOFTLIGHT,
+BLEND_SUBTRACT,
+BLEND_VIVIDLIGHT,
+BLEND_XOR,
+BLEND_HARDMIX,
+BLEND_LINEARLIGHT,
+BLEND_GLOW,
+BLEND_ADDITION128,
+BLEND_NB
+};
+
+typedef struct FilterParams {
+enum BlendMode mode;
+double opacity;
+AVExpr *e;
+char *expr_str;
+void (*blend)(const uint8_t *top, ptrdiff_t top_linesize,
+  const uint8_t *bottom, ptrdiff_t bottom_linesize,
+  uint8_t *dst, ptrdiff_t dst_linesize,
+  int width, int start, int end,
+  struct FilterParams *param, double *values);
+} FilterParams;
+
+void ff_blend_init_x86(FilterParams *param, int is_16bit);
diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index f6a649b..b19eb6b 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c
@@ -28,69 +28,12 @@
 #include "internal.h"
 #include "dualinput.h"
 #include "video.h"
+#include "blend.h"
 
 #define TOP0
 #define BOTTOM 1
 
-enum BlendMode {
-BLEND_UNSET = -1,
-BLEND_NORMAL,
-BLEND_ADDITION,
-BLEND_AND,
-BLEND_AVERAGE,
-BLEND_BURN,
-BLEND_DARKEN,
-BLEND_DIFFERENCE,
-BLEND_DIFFERENCE128,
-BLEND_DIVIDE,
-BLEND_DODGE,
-BLEND_EXCLUSION,
-BLEND_HARDLIGHT,
-BLEND_LIGHTEN,
-BLEND_MULTIPLY,
-BLEND_NEGATION,
-BLEND_OR,
-BLEND_OVERLAY,
-BLEND_PHOENIX,
-BLEND_PINLIGHT,
-BLEND_REFLECT,
-BLEND_SCREEN,
-BLEND_SOFTLIGHT,
-BLEND_SUBTRACT,
-BLEND_VIVIDLIGHT,
-BLEND_XOR,
-BLEND_HARDMIX,
-BLEND_LINEARLIGHT,
-BLEND_GLOW,
-BLEND_ADDITION128,
-BLEND_NB
-};
-
-static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "SW",   
"SH",   "T",   "N",   "A",   "B",   "TOP",   "BOTTOM",NULL };
-enum   { VAR_X, VAR_Y, VAR_W, VAR_H, VAR_SW, 
VAR_SH, VAR_T, VAR_N, VAR_A, VAR_B, VAR_TOP, VAR_BOTTOM, VAR_VARS_NB };
-
-typedef struct FilterParams {
-enum BlendMode mode;
-double opacity;
-AVExpr *e;
-char *expr_str;
-void (*blend)(const uint8_t *top, int top_linesize,
-  const uint8_t *bottom, int bottom_linesize,
-  uint8_t *dst, int dst_linesize,
-  int width, int start, int end,
-  struct FilterParams *param, double *values);
-} FilterParams;
-
-typedef struct ThreadData {
-const AVFrame *top, *bottom;
-AVFrame *dst;
-AVFilterLink *inlink;
-int plane;
-int w, h;
-FilterParams *param;
-} ThreadData;
-
-typedef struct {
+typedef struct BlendContext {
 const AVClass *class;
 FFDualInputContext dinput;
 int hsub, vsub; ///< chroma subsampling values
@@ -104,6 +47,18 @@ typedef struct {
 AVFrame *prev_frame;/* only used with tblend */
 } BlendContext;
 
+static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "SW",   
"SH",   "T",   "N",   "A",   "B",   "TOP",   "BOTTOM",NULL };
+enum   

Re: [FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes

2015-10-02 Thread Henrik Gramner
On Fri, Oct 2, 2015 at 6:57 PM, Paul B Mahol  wrote:
> +INIT_XMM sse2
> +cglobal blend_xor, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, 
> dst, dst_linesize, width, start, end
[...]
> +cglobal blend_or, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, 
> dst, dst_linesize, width, start, end
[...]
> +cglobal blend_and, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, 
> dst, dst_linesize, width, start, end

You could do those using floating point operations (xorps, orps,
andps), then you only need SSE instead of SSE2 (and AVX instead of
AVX2 if you want to make versions using ymm registers).

> +cglobal blend_addition, 9, 10, 3, 0, top, top_linesize, bottom, 
> bottom_linesize, dst, dst_linesize, width, start, end
[...]
> +punpcklbw   m0, m2
> +punpcklbw   m1, m2
> +paddw   m0, m1
> +packuswbm0, m0
> +movh[dstq + x], m0
> +add   r10q, mmsize / 2

paddusb

> +cglobal blend_subtract, 9, 10, 3, 0, top, top_linesize, bottom, 
> bottom_linesize, dst, dst_linesize, width, start, end
[...]
> +punpcklbw   m0, m2
> +punpcklbw   m1, m2
> +psubw   m0, m1
> +packuswbm0, m0

psubusb

> +cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, 
> bottom_linesize, dst, dst_linesize, width, start, end
[...]
> +movhm0, [topq + x]
> +movhm1, [bottomq + x]
> +pminub  m0, m1
> +movh[dstq + x], m0
[...]
> +cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, 
> bottom_linesize, dst, dst_linesize, width, start, end
[...]
> +movhm0, [topq + x]
> +movhm1, [bottomq + x]
> +pmaxub  m0, m1
> +movh[dstq + x], m0

You're only utilizing the lower half the registers here.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes

2015-10-02 Thread Paul B Mahol
Signed-off-by: Paul B Mahol 
---
 libavfilter/blend.h |  70 ++
 libavfilter/vf_blend.c  | 100 +--
 libavfilter/x86/Makefile|   4 +
 libavfilter/x86/vf_blend.asm| 278 
 libavfilter/x86/vf_blend_init.c |  97 ++
 5 files changed, 478 insertions(+), 71 deletions(-)
 create mode 100644 libavfilter/blend.h
 create mode 100644 libavfilter/x86/vf_blend.asm
 create mode 100644 libavfilter/x86/vf_blend_init.c

diff --git a/libavfilter/blend.h b/libavfilter/blend.h
new file mode 100644
index 000..c22ecd2
--- /dev/null
+++ b/libavfilter/blend.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/eval.h"
+#include "avfilter.h"
+
+enum BlendMode {
+BLEND_UNSET = -1,
+BLEND_NORMAL,
+BLEND_ADDITION,
+BLEND_AND,
+BLEND_AVERAGE,
+BLEND_BURN,
+BLEND_DARKEN,
+BLEND_DIFFERENCE,
+BLEND_DIFFERENCE128,
+BLEND_DIVIDE,
+BLEND_DODGE,
+BLEND_EXCLUSION,
+BLEND_HARDLIGHT,
+BLEND_LIGHTEN,
+BLEND_MULTIPLY,
+BLEND_NEGATION,
+BLEND_OR,
+BLEND_OVERLAY,
+BLEND_PHOENIX,
+BLEND_PINLIGHT,
+BLEND_REFLECT,
+BLEND_SCREEN,
+BLEND_SOFTLIGHT,
+BLEND_SUBTRACT,
+BLEND_VIVIDLIGHT,
+BLEND_XOR,
+BLEND_HARDMIX,
+BLEND_LINEARLIGHT,
+BLEND_GLOW,
+BLEND_ADDITION128,
+BLEND_NB
+};
+
+typedef struct FilterParams {
+enum BlendMode mode;
+double opacity;
+AVExpr *e;
+char *expr_str;
+void (*blend)(const uint8_t *top, ptrdiff_t top_linesize,
+  const uint8_t *bottom, ptrdiff_t bottom_linesize,
+  uint8_t *dst, ptrdiff_t dst_linesize,
+  int width, int start, int end,
+  struct FilterParams *param, double *values);
+} FilterParams;
+
+void ff_blend_init_x86(FilterParams *param, int is_16bit);
diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index f6a649b..b19eb6b 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c
@@ -28,69 +28,12 @@
 #include "internal.h"
 #include "dualinput.h"
 #include "video.h"
+#include "blend.h"
 
 #define TOP0
 #define BOTTOM 1
 
-enum BlendMode {
-BLEND_UNSET = -1,
-BLEND_NORMAL,
-BLEND_ADDITION,
-BLEND_AND,
-BLEND_AVERAGE,
-BLEND_BURN,
-BLEND_DARKEN,
-BLEND_DIFFERENCE,
-BLEND_DIFFERENCE128,
-BLEND_DIVIDE,
-BLEND_DODGE,
-BLEND_EXCLUSION,
-BLEND_HARDLIGHT,
-BLEND_LIGHTEN,
-BLEND_MULTIPLY,
-BLEND_NEGATION,
-BLEND_OR,
-BLEND_OVERLAY,
-BLEND_PHOENIX,
-BLEND_PINLIGHT,
-BLEND_REFLECT,
-BLEND_SCREEN,
-BLEND_SOFTLIGHT,
-BLEND_SUBTRACT,
-BLEND_VIVIDLIGHT,
-BLEND_XOR,
-BLEND_HARDMIX,
-BLEND_LINEARLIGHT,
-BLEND_GLOW,
-BLEND_ADDITION128,
-BLEND_NB
-};
-
-static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "SW",   
"SH",   "T",   "N",   "A",   "B",   "TOP",   "BOTTOM",NULL };
-enum   { VAR_X, VAR_Y, VAR_W, VAR_H, VAR_SW, 
VAR_SH, VAR_T, VAR_N, VAR_A, VAR_B, VAR_TOP, VAR_BOTTOM, VAR_VARS_NB };
-
-typedef struct FilterParams {
-enum BlendMode mode;
-double opacity;
-AVExpr *e;
-char *expr_str;
-void (*blend)(const uint8_t *top, int top_linesize,
-  const uint8_t *bottom, int bottom_linesize,
-  uint8_t *dst, int dst_linesize,
-  int width, int start, int end,
-  struct FilterParams *param, double *values);
-} FilterParams;
-
-typedef struct ThreadData {
-const AVFrame *top, *bottom;
-AVFrame *dst;
-AVFilterLink *inlink;
-int plane;
-int w, h;
-FilterParams *param;
-} ThreadData;
-
-typedef struct {
+typedef struct BlendContext {
 const AVClass *class;
 FFDualInputContext dinput;
 int hsub, vsub; ///< chroma subsampling values
@@ -104,6 +47,18 @@ typedef struct {
 AVFrame *prev_frame;/* only used with tblend */
 } BlendContext;
 
+static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "SW",   
"SH",   "T",   "N",   "A",   "B",   "TOP",   "BOTTOM",NULL };
+enum   

Re: [FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes

2015-10-02 Thread Paul B Mahol
On 10/2/15, Paul B Mahol  wrote:
> Signed-off-by: Paul B Mahol 
> ---
>  libavfilter/blend.h |  70 ++
>  libavfilter/vf_blend.c  | 100 +--
>  libavfilter/x86/Makefile|   4 +
>  libavfilter/x86/vf_blend.asm| 278
> 
>  libavfilter/x86/vf_blend_init.c |  97 ++
>  5 files changed, 478 insertions(+), 71 deletions(-)
>  create mode 100644 libavfilter/blend.h
>  create mode 100644 libavfilter/x86/vf_blend.asm
>  create mode 100644 libavfilter/x86/vf_blend_init.c
>

[..]

> +
> +cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom,
> bottom_linesize, dst, dst_linesize, width, start, end
> +add  topq, widthq
> +add   bottomq, widthq
> +add  dstq, widthq
> +sub  endq, startq
> +negwidthq
> +.nextrow:
> +mov   r10q, widthq
> +%define  x  r10q
> +
> +.loop:
> +movhm0, [topq + x]
> +movhm1, [bottomq + x]
> +pminub  m0, m1
> +movh[dstq + x], m0
> +add   r10q, mmsize / 2

Removed division.

> +jl .loop
> +
> +add  topq, top_linesizeq
> +add   bottomq, bottom_linesizeq
> +add  dstq, dst_linesizeq
> +sub  endd, 1
> +jg .nextrow
> +REP_RET
> +
> +cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom,
> bottom_linesize, dst, dst_linesize, width, start, end
> +add  topq, widthq
> +add   bottomq, widthq
> +add  dstq, widthq
> +sub  endq, startq
> +negwidthq
> +.nextrow:
> +mov   r10q, widthq
> +%define  x  r10q
> +
> +.loop:
> +movhm0, [topq + x]
> +movhm1, [bottomq + x]
> +pmaxub  m0, m1
> +movh[dstq + x], m0
> +add   r10q, mmsize / 2

Removed division.

> +jl .loop
> +
> +add  topq, top_linesizeq
> +add   bottomq, bottom_linesizeq
> +add  dstq, dst_linesizeq
> +sub  endd, 1
> +jg .nextrow
> +REP_RET
> +
> +%endif

[...]
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes

2015-10-02 Thread Paul B Mahol
On 10/2/15, Henrik Gramner  wrote:
> On Fri, Oct 2, 2015 at 6:57 PM, Paul B Mahol  wrote:
>> +INIT_XMM sse2
>> +cglobal blend_xor, 9, 10, 2, 0, top, top_linesize, bottom,
>> bottom_linesize, dst, dst_linesize, width, start, end
> [...]
>> +cglobal blend_or, 9, 10, 2, 0, top, top_linesize, bottom,
>> bottom_linesize, dst, dst_linesize, width, start, end
> [...]
>> +cglobal blend_and, 9, 10, 2, 0, top, top_linesize, bottom,
>> bottom_linesize, dst, dst_linesize, width, start, end
>
> You could do those using floating point operations (xorps, orps,
> andps), then you only need SSE instead of SSE2 (and AVX instead of
> AVX2 if you want to make versions using ymm registers).
>
>> +cglobal blend_addition, 9, 10, 3, 0, top, top_linesize, bottom,
>> bottom_linesize, dst, dst_linesize, width, start, end
> [...]
>> +punpcklbw   m0, m2
>> +punpcklbw   m1, m2
>> +paddw   m0, m1
>> +packuswbm0, m0
>> +movh[dstq + x], m0
>> +add   r10q, mmsize / 2
>
> paddusb
>

fixed locally.

>> +cglobal blend_subtract, 9, 10, 3, 0, top, top_linesize, bottom,
>> bottom_linesize, dst, dst_linesize, width, start, end
> [...]
>> +punpcklbw   m0, m2
>> +punpcklbw   m1, m2
>> +psubw   m0, m1
>> +packuswbm0, m0
>
> psubusb

fixed locally.

>
>> +cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom,
>> bottom_linesize, dst, dst_linesize, width, start, end
> [...]
>> +movhm0, [topq + x]
>> +movhm1, [bottomq + x]
>> +pminub  m0, m1
>> +movh[dstq + x], m0
> [...]
>> +cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom,
>> bottom_linesize, dst, dst_linesize, width, start, end
> [...]
>> +movhm0, [topq + x]
>> +movhm1, [bottomq + x]
>> +pmaxub  m0, m1
>> +movh[dstq + x], m0
>
> You're only utilizing the lower half the registers here.

fixed locally.

> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel