Re: [FFmpeg-devel] [PATCH 2/2] x86/vf_blend: Add SSE2 optimization for screen

2016-02-10 Thread Paul B Mahol
On 2/9/16, Timothy Gu  wrote:
> ---
>  libavfilter/x86/vf_blend.asm| 29 +
>  libavfilter/x86/vf_blend_init.c |  2 ++
>  2 files changed, 31 insertions(+)
>

Nice!

LGTM
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] x86/vf_blend: Add SSE2 optimization for screen

2016-02-09 Thread Timothy Gu
---
 libavfilter/x86/vf_blend.asm| 29 +
 libavfilter/x86/vf_blend_init.c |  2 ++
 2 files changed, 31 insertions(+)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 50b5f8a..a5ea74c 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -111,6 +111,13 @@ BLEND_END
 psrlw   %1, 8; 00xx00xx  a * b / 255
 %endmacro
 
+%macro SCREEN 4   ; a, b, pw_1, pw_255
+pxor%1, %4   ; 00xx00xx  255 - a
+pxor%2, %4
+MULTIPLY%1, %2, %3
+pxor%1, %4   ; 00xx00xx  255 - x / 255
+%endmacro
+
 BLEND_INIT multiply, 4
 pxor   m2, m2
 mova   m3, [pw_1]
@@ -134,6 +141,28 @@ BLEND_INIT multiply, 4
 jl .loop
 BLEND_END
 
+BLEND_INIT screen, 5
+pxor   m2, m2
+mova   m3, [pw_1]
+mova   m4, [pw_255]
+.nextrow:
+movxq, widthq
+
+.loop:
+movhm0, [topq + xq]  ; 
+movhm1, [bottomq + xq]
+punpcklbw   m0, m2   ; 00xx00xx
+punpcklbw   m1, m2
+
+SCREEN  m0, m1, m3, m4
+
+packuswbm0, m0   ; 
+movh   [dstq + xq], m0
+add xq, mmsize / 2
+
+jl .loop
+BLEND_END
+
 BLEND_INIT average, 3
 pxor   m2, m2
 .nextrow:
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 8ac526a..a6baf94 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -37,6 +37,7 @@ BLEND_FUNC(and, sse2)
 BLEND_FUNC(darken, sse2)
 BLEND_FUNC(difference128, sse2)
 BLEND_FUNC(multiply, sse2)
+BLEND_FUNC(screen, sse2)
 BLEND_FUNC(hardmix, sse2)
 BLEND_FUNC(lighten, sse2)
 BLEND_FUNC(or, sse2)
@@ -65,6 +66,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int 
is_16bit)
 case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break;
 case BLEND_OR:   param->blend = ff_blend_or_sse2;   break;
 case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
+case BLEND_SCREEN:   param->blend = ff_blend_screen_sse2; break;
 case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
 case BLEND_XOR:  param->blend = ff_blend_xor_sse2;  break;
 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;
-- 
1.9.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel