Re: [FFmpeg-devel] [PATCH 2/2] x86/vf_stereo3d: make ff_anaglyph_sse4 work on x86_32

2015-12-28 Thread Paul B Mahol
On 12/27/15, James Almer  wrote:
> Signed-off-by: James Almer 
> ---
>  libavfilter/x86/vf_stereo3d.asm| 47
> +++---
>  libavfilter/x86/vf_stereo3d_init.c |  2 +-
>  2 files changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/libavfilter/x86/vf_stereo3d.asm
> b/libavfilter/x86/vf_stereo3d.asm
> index 29a8c56..491579f 100644
> --- a/libavfilter/x86/vf_stereo3d.asm
> +++ b/libavfilter/x86/vf_stereo3d.asm
> @@ -22,8 +22,6 @@
>
>  %include "libavutil/x86/x86util.asm"
>
> -%if ARCH_X86_64
> -
>  SECTION_RODATA
>
>  ; rgbrgbrgbrgb
> @@ -37,10 +35,33 @@ ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1
>  SECTION .text
>
>  INIT_XMM sse4
> +%if ARCH_X86_64
>  cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize,
> l_linesize, r_linesize, width, height, o, cnt
>  %define ana_matrix_rq r6q
>  %define ana_matrix_gq r7q
>  %define ana_matrix_bq r8q
> +
> +%else ; ARCH_X86_32
> +%if HAVE_ALIGNED_STACK
> +cglobal anaglyph, 3, 7, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize,
> l_linesize, o, cnt
> +%else
> +cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o,
> cnt
> +%define l_linesizeq r4mp
> +%endif ; HAVE_ALIGNED_STACK
> +%define ana_matrix_rq r3q
> +%define ana_matrix_gq r4q
> +%define ana_matrix_bq r5q
> +%define r_linesizeq r5mp
> +%define widthd  r6mp
> +%define heightd r7mp
> +%define  m8 [rsp+mmsize*12]
> +%define  m9 [rsp+mmsize*13]
> +%define m10 [rsp+mmsize*14]
> +%define m11 [rsp+mmsize*15]
> +%define m12 [rsp+mmsize*16]
> +%define m13 [rsp+mmsize*17]
> +%endif ; ARCH
> +
>  movana_matrix_rq, r8m
>  movana_matrix_gq, r9m
>  movana_matrix_bq, r10m
> @@ -74,6 +95,7 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc,
> dst_linesize, l_linesi
>  mova [rsp+mmsize*10], m4
>  mova [rsp+mmsize*11], m5
>
> +%if ARCH_X86_64
>  movu m11, [ana_matrix_bq+ 0]
>  movq m13, [ana_matrix_bq+16]
>  pshufdm8, m11, q
> @@ -84,6 +106,26 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc,
> rsrc, dst_linesize, l_linesi
>  pshufd   m13, m13, q
>  mov   widthd, dword widthm
>  mov  heightd, dword heightm
> +%else
> +movu  m3, [ana_matrix_bq+ 0]
> +movq  m5, [ana_matrix_bq+16]
> +pshufdm0, m3, q
> +pshufdm1, m3, q
> +pshufdm2, m3, q
> +pshufdm3, m3, q
> +pshufdm4, m5, q
> +pshufdm5, m5, q
> +mova [rsp+mmsize*12], m0
> +mova [rsp+mmsize*13], m1
> +mova [rsp+mmsize*14], m2
> +mova [rsp+mmsize*15], m3
> +mova [rsp+mmsize*16], m4
> +mova [rsp+mmsize*17], m5
> +movdst_linesizeq, r3m
> +%if HAVE_ALIGNED_STACK
> +mov  l_linesizeq, r4m
> +%endif
> +%endif ; ARCH
>
>  .nextrow:
>  mov   od, widthd
> @@ -172,4 +214,3 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc,
> rsrc, dst_linesize, l_linesi
>  sub   heightd, 1
>  jg .nextrow
>  REP_RET
> -%endif
> diff --git a/libavfilter/x86/vf_stereo3d_init.c
> b/libavfilter/x86/vf_stereo3d_init.c
> index 77d4f7b..da160a8 100644
> --- a/libavfilter/x86/vf_stereo3d_init.c
> +++ b/libavfilter/x86/vf_stereo3d_init.c
> @@ -31,7 +31,7 @@ void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp)
>  {
>  int cpu_flags = av_get_cpu_flags();
>
> -if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) {
> +if (EXTERNAL_SSE4(cpu_flags)) {
>  dsp->anaglyph = ff_anaglyph_sse4;
>  }
>  }
> --
> 2.6.3
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

both patches ok if fate is not broken by this
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/2] x86/vf_stereo3d: make ff_anaglyph_sse4 work on x86_32

2015-12-28 Thread James Almer
On 12/28/2015 5:15 AM, Paul B Mahol wrote:
> On 12/27/15, James Almer  wrote:
>> Signed-off-by: James Almer 
>> ---
>>  libavfilter/x86/vf_stereo3d.asm| 47
>> +++---
>>  libavfilter/x86/vf_stereo3d_init.c |  2 +-
>>  2 files changed, 45 insertions(+), 4 deletions(-)
>>
>> diff --git a/libavfilter/x86/vf_stereo3d.asm
>> b/libavfilter/x86/vf_stereo3d.asm
>> index 29a8c56..491579f 100644
>> --- a/libavfilter/x86/vf_stereo3d.asm
>> +++ b/libavfilter/x86/vf_stereo3d.asm
>> @@ -22,8 +22,6 @@
>>
>>  %include "libavutil/x86/x86util.asm"
>>
>> -%if ARCH_X86_64
>> -
>>  SECTION_RODATA
>>
>>  ; rgbrgbrgbrgb
>> @@ -37,10 +35,33 @@ ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1
>>  SECTION .text
>>
>>  INIT_XMM sse4
>> +%if ARCH_X86_64
>>  cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize,
>> l_linesize, r_linesize, width, height, o, cnt
>>  %define ana_matrix_rq r6q
>>  %define ana_matrix_gq r7q
>>  %define ana_matrix_bq r8q
>> +
>> +%else ; ARCH_X86_32
>> +%if HAVE_ALIGNED_STACK
>> +cglobal anaglyph, 3, 7, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize,
>> l_linesize, o, cnt
>> +%else
>> +cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o,
>> cnt
>> +%define l_linesizeq r4mp
>> +%endif ; HAVE_ALIGNED_STACK
>> +%define ana_matrix_rq r3q
>> +%define ana_matrix_gq r4q
>> +%define ana_matrix_bq r5q
>> +%define r_linesizeq r5mp
>> +%define widthd  r6mp
>> +%define heightd r7mp
>> +%define  m8 [rsp+mmsize*12]
>> +%define  m9 [rsp+mmsize*13]
>> +%define m10 [rsp+mmsize*14]
>> +%define m11 [rsp+mmsize*15]
>> +%define m12 [rsp+mmsize*16]
>> +%define m13 [rsp+mmsize*17]
>> +%endif ; ARCH
>> +
>>  movana_matrix_rq, r8m
>>  movana_matrix_gq, r9m
>>  movana_matrix_bq, r10m
>> @@ -74,6 +95,7 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc,
>> dst_linesize, l_linesi
>>  mova [rsp+mmsize*10], m4
>>  mova [rsp+mmsize*11], m5
>>
>> +%if ARCH_X86_64
>>  movu m11, [ana_matrix_bq+ 0]
>>  movq m13, [ana_matrix_bq+16]
>>  pshufdm8, m11, q
>> @@ -84,6 +106,26 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc,
>> rsrc, dst_linesize, l_linesi
>>  pshufd   m13, m13, q
>>  mov   widthd, dword widthm
>>  mov  heightd, dword heightm
>> +%else
>> +movu  m3, [ana_matrix_bq+ 0]
>> +movq  m5, [ana_matrix_bq+16]
>> +pshufdm0, m3, q
>> +pshufdm1, m3, q
>> +pshufdm2, m3, q
>> +pshufdm3, m3, q
>> +pshufdm4, m5, q
>> +pshufdm5, m5, q
>> +mova [rsp+mmsize*12], m0
>> +mova [rsp+mmsize*13], m1
>> +mova [rsp+mmsize*14], m2
>> +mova [rsp+mmsize*15], m3
>> +mova [rsp+mmsize*16], m4
>> +mova [rsp+mmsize*17], m5
>> +movdst_linesizeq, r3m
>> +%if HAVE_ALIGNED_STACK
>> +mov  l_linesizeq, r4m
>> +%endif
>> +%endif ; ARCH
>>
>>  .nextrow:
>>  mov   od, widthd
>> @@ -172,4 +214,3 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc,
>> rsrc, dst_linesize, l_linesi
>>  sub   heightd, 1
>>  jg .nextrow
>>  REP_RET
>> -%endif
>> diff --git a/libavfilter/x86/vf_stereo3d_init.c
>> b/libavfilter/x86/vf_stereo3d_init.c
>> index 77d4f7b..da160a8 100644
>> --- a/libavfilter/x86/vf_stereo3d_init.c
>> +++ b/libavfilter/x86/vf_stereo3d_init.c
>> @@ -31,7 +31,7 @@ void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp)
>>  {
>>  int cpu_flags = av_get_cpu_flags();
>>
>> -if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) {
>> +if (EXTERNAL_SSE4(cpu_flags)) {
>>  dsp->anaglyph = ff_anaglyph_sse4;
>>  }
>>  }
>> --
>> 2.6.3
>>
>> ___
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
> 
> both patches ok if fate is not broken by this

Pushed then, thanks.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] x86/vf_stereo3d: make ff_anaglyph_sse4 work on x86_32

2015-12-27 Thread James Almer
Signed-off-by: James Almer 
---
 libavfilter/x86/vf_stereo3d.asm| 47 +++---
 libavfilter/x86/vf_stereo3d_init.c |  2 +-
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/libavfilter/x86/vf_stereo3d.asm b/libavfilter/x86/vf_stereo3d.asm
index 29a8c56..491579f 100644
--- a/libavfilter/x86/vf_stereo3d.asm
+++ b/libavfilter/x86/vf_stereo3d.asm
@@ -22,8 +22,6 @@
 
 %include "libavutil/x86/x86util.asm"
 
-%if ARCH_X86_64
-
 SECTION_RODATA
 
 ; rgbrgbrgbrgb
@@ -37,10 +35,33 @@ ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1
 SECTION .text
 
 INIT_XMM sse4
+%if ARCH_X86_64
 cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, 
l_linesize, r_linesize, width, height, o, cnt
 %define ana_matrix_rq r6q
 %define ana_matrix_gq r7q
 %define ana_matrix_bq r8q
+
+%else ; ARCH_X86_32
+%if HAVE_ALIGNED_STACK
+cglobal anaglyph, 3, 7, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, 
l_linesize, o, cnt
+%else
+cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, cnt
+%define l_linesizeq r4mp
+%endif ; HAVE_ALIGNED_STACK
+%define ana_matrix_rq r3q
+%define ana_matrix_gq r4q
+%define ana_matrix_bq r5q
+%define r_linesizeq r5mp
+%define widthd  r6mp
+%define heightd r7mp
+%define  m8 [rsp+mmsize*12]
+%define  m9 [rsp+mmsize*13]
+%define m10 [rsp+mmsize*14]
+%define m11 [rsp+mmsize*15]
+%define m12 [rsp+mmsize*16]
+%define m13 [rsp+mmsize*17]
+%endif ; ARCH
+
 movana_matrix_rq, r8m
 movana_matrix_gq, r9m
 movana_matrix_bq, r10m
@@ -74,6 +95,7 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, 
dst_linesize, l_linesi
 mova [rsp+mmsize*10], m4
 mova [rsp+mmsize*11], m5
 
+%if ARCH_X86_64
 movu m11, [ana_matrix_bq+ 0]
 movq m13, [ana_matrix_bq+16]
 pshufdm8, m11, q
@@ -84,6 +106,26 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, 
dst_linesize, l_linesi
 pshufd   m13, m13, q
 mov   widthd, dword widthm
 mov  heightd, dword heightm
+%else
+movu  m3, [ana_matrix_bq+ 0]
+movq  m5, [ana_matrix_bq+16]
+pshufdm0, m3, q
+pshufdm1, m3, q
+pshufdm2, m3, q
+pshufdm3, m3, q
+pshufdm4, m5, q
+pshufdm5, m5, q
+mova [rsp+mmsize*12], m0
+mova [rsp+mmsize*13], m1
+mova [rsp+mmsize*14], m2
+mova [rsp+mmsize*15], m3
+mova [rsp+mmsize*16], m4
+mova [rsp+mmsize*17], m5
+movdst_linesizeq, r3m
+%if HAVE_ALIGNED_STACK
+mov  l_linesizeq, r4m
+%endif
+%endif ; ARCH
 
 .nextrow:
 mov   od, widthd
@@ -172,4 +214,3 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, 
dst_linesize, l_linesi
 sub   heightd, 1
 jg .nextrow
 REP_RET
-%endif
diff --git a/libavfilter/x86/vf_stereo3d_init.c 
b/libavfilter/x86/vf_stereo3d_init.c
index 77d4f7b..da160a8 100644
--- a/libavfilter/x86/vf_stereo3d_init.c
+++ b/libavfilter/x86/vf_stereo3d_init.c
@@ -31,7 +31,7 @@ void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) {
+if (EXTERNAL_SSE4(cpu_flags)) {
 dsp->anaglyph = ff_anaglyph_sse4;
 }
 }
-- 
2.6.3

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel