Re: [FFmpeg-devel] [PATCH 2/2] x86/vf_stereo3d: make ff_anaglyph_sse4 work on x86_32
On 12/27/15, James Almerwrote: > Signed-off-by: James Almer > --- > libavfilter/x86/vf_stereo3d.asm| 47 > +++--- > libavfilter/x86/vf_stereo3d_init.c | 2 +- > 2 files changed, 45 insertions(+), 4 deletions(-) > > diff --git a/libavfilter/x86/vf_stereo3d.asm > b/libavfilter/x86/vf_stereo3d.asm > index 29a8c56..491579f 100644 > --- a/libavfilter/x86/vf_stereo3d.asm > +++ b/libavfilter/x86/vf_stereo3d.asm > @@ -22,8 +22,6 @@ > > %include "libavutil/x86/x86util.asm" > > -%if ARCH_X86_64 > - > SECTION_RODATA > > ; rgbrgbrgbrgb > @@ -37,10 +35,33 @@ ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1 > SECTION .text > > INIT_XMM sse4 > +%if ARCH_X86_64 > cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, > l_linesize, r_linesize, width, height, o, cnt > %define ana_matrix_rq r6q > %define ana_matrix_gq r7q > %define ana_matrix_bq r8q > + > +%else ; ARCH_X86_32 > +%if HAVE_ALIGNED_STACK > +cglobal anaglyph, 3, 7, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, > l_linesize, o, cnt > +%else > +cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, > cnt > +%define l_linesizeq r4mp > +%endif ; HAVE_ALIGNED_STACK > +%define ana_matrix_rq r3q > +%define ana_matrix_gq r4q > +%define ana_matrix_bq r5q > +%define r_linesizeq r5mp > +%define widthd r6mp > +%define heightd r7mp > +%define m8 [rsp+mmsize*12] > +%define m9 [rsp+mmsize*13] > +%define m10 [rsp+mmsize*14] > +%define m11 [rsp+mmsize*15] > +%define m12 [rsp+mmsize*16] > +%define m13 [rsp+mmsize*17] > +%endif ; ARCH > + > movana_matrix_rq, r8m > movana_matrix_gq, r9m > movana_matrix_bq, r10m > @@ -74,6 +95,7 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, > dst_linesize, l_linesi > mova [rsp+mmsize*10], m4 > mova [rsp+mmsize*11], m5 > > +%if ARCH_X86_64 > movu m11, [ana_matrix_bq+ 0] > movq m13, [ana_matrix_bq+16] > pshufdm8, m11, q > @@ -84,6 +106,26 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, > rsrc, dst_linesize, l_linesi > pshufd m13, m13, q > mov widthd, dword widthm > mov heightd, dword heightm > +%else > +movu m3, [ana_matrix_bq+ 0] > +movq m5, [ana_matrix_bq+16] > +pshufdm0, m3, q > +pshufdm1, m3, q > +pshufdm2, m3, q > +pshufdm3, m3, q > +pshufdm4, m5, q > +pshufdm5, m5, q > +mova [rsp+mmsize*12], m0 > +mova [rsp+mmsize*13], m1 > +mova [rsp+mmsize*14], m2 > +mova [rsp+mmsize*15], m3 > +mova [rsp+mmsize*16], m4 > +mova [rsp+mmsize*17], m5 > +movdst_linesizeq, r3m > +%if HAVE_ALIGNED_STACK > +mov l_linesizeq, r4m > +%endif > +%endif ; ARCH > > .nextrow: > mov od, widthd > @@ -172,4 +214,3 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, > rsrc, dst_linesize, l_linesi > sub heightd, 1 > jg .nextrow > REP_RET > -%endif > diff --git a/libavfilter/x86/vf_stereo3d_init.c > b/libavfilter/x86/vf_stereo3d_init.c > index 77d4f7b..da160a8 100644 > --- a/libavfilter/x86/vf_stereo3d_init.c > +++ b/libavfilter/x86/vf_stereo3d_init.c > @@ -31,7 +31,7 @@ void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp) > { > int cpu_flags = av_get_cpu_flags(); > > -if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) { > +if (EXTERNAL_SSE4(cpu_flags)) { > dsp->anaglyph = ff_anaglyph_sse4; > } > } > -- > 2.6.3 > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > both patches ok if fate is not broken by this ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/2] x86/vf_stereo3d: make ff_anaglyph_sse4 work on x86_32
On 12/28/2015 5:15 AM, Paul B Mahol wrote: > On 12/27/15, James Almerwrote: >> Signed-off-by: James Almer >> --- >> libavfilter/x86/vf_stereo3d.asm| 47 >> +++--- >> libavfilter/x86/vf_stereo3d_init.c | 2 +- >> 2 files changed, 45 insertions(+), 4 deletions(-) >> >> diff --git a/libavfilter/x86/vf_stereo3d.asm >> b/libavfilter/x86/vf_stereo3d.asm >> index 29a8c56..491579f 100644 >> --- a/libavfilter/x86/vf_stereo3d.asm >> +++ b/libavfilter/x86/vf_stereo3d.asm >> @@ -22,8 +22,6 @@ >> >> %include "libavutil/x86/x86util.asm" >> >> -%if ARCH_X86_64 >> - >> SECTION_RODATA >> >> ; rgbrgbrgbrgb >> @@ -37,10 +35,33 @@ ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1 >> SECTION .text >> >> INIT_XMM sse4 >> +%if ARCH_X86_64 >> cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, >> l_linesize, r_linesize, width, height, o, cnt >> %define ana_matrix_rq r6q >> %define ana_matrix_gq r7q >> %define ana_matrix_bq r8q >> + >> +%else ; ARCH_X86_32 >> +%if HAVE_ALIGNED_STACK >> +cglobal anaglyph, 3, 7, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, >> l_linesize, o, cnt >> +%else >> +cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, >> cnt >> +%define l_linesizeq r4mp >> +%endif ; HAVE_ALIGNED_STACK >> +%define ana_matrix_rq r3q >> +%define ana_matrix_gq r4q >> +%define ana_matrix_bq r5q >> +%define r_linesizeq r5mp >> +%define widthd r6mp >> +%define heightd r7mp >> +%define m8 [rsp+mmsize*12] >> +%define m9 [rsp+mmsize*13] >> +%define m10 [rsp+mmsize*14] >> +%define m11 [rsp+mmsize*15] >> +%define m12 [rsp+mmsize*16] >> +%define m13 [rsp+mmsize*17] >> +%endif ; ARCH >> + >> movana_matrix_rq, r8m >> movana_matrix_gq, r9m >> movana_matrix_bq, r10m >> @@ -74,6 +95,7 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, >> dst_linesize, l_linesi >> mova [rsp+mmsize*10], m4 >> mova [rsp+mmsize*11], m5 >> >> +%if ARCH_X86_64 >> movu m11, [ana_matrix_bq+ 0] >> movq m13, [ana_matrix_bq+16] >> pshufdm8, m11, q >> @@ -84,6 +106,26 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, >> rsrc, dst_linesize, l_linesi >> pshufd m13, m13, q >> mov widthd, dword widthm >> mov heightd, dword heightm >> +%else >> +movu m3, [ana_matrix_bq+ 0] >> +movq m5, [ana_matrix_bq+16] >> +pshufdm0, m3, q >> +pshufdm1, m3, q >> +pshufdm2, m3, q >> +pshufdm3, m3, q >> +pshufdm4, m5, q >> +pshufdm5, m5, q >> +mova [rsp+mmsize*12], m0 >> +mova [rsp+mmsize*13], m1 >> +mova [rsp+mmsize*14], m2 >> +mova [rsp+mmsize*15], m3 >> +mova [rsp+mmsize*16], m4 >> +mova [rsp+mmsize*17], m5 >> +movdst_linesizeq, r3m >> +%if HAVE_ALIGNED_STACK >> +mov l_linesizeq, r4m >> +%endif >> +%endif ; ARCH >> >> .nextrow: >> mov od, widthd >> @@ -172,4 +214,3 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, >> rsrc, dst_linesize, l_linesi >> sub heightd, 1 >> jg .nextrow >> REP_RET >> -%endif >> diff --git a/libavfilter/x86/vf_stereo3d_init.c >> b/libavfilter/x86/vf_stereo3d_init.c >> index 77d4f7b..da160a8 100644 >> --- a/libavfilter/x86/vf_stereo3d_init.c >> +++ b/libavfilter/x86/vf_stereo3d_init.c >> @@ -31,7 +31,7 @@ void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp) >> { >> int cpu_flags = av_get_cpu_flags(); >> >> -if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) { >> +if (EXTERNAL_SSE4(cpu_flags)) { >> dsp->anaglyph = ff_anaglyph_sse4; >> } >> } >> -- >> 2.6.3 >> >> ___ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> > > both patches ok if fate is not broken by this Pushed then, thanks. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/2] x86/vf_stereo3d: make ff_anaglyph_sse4 work on x86_32
Signed-off-by: James Almer--- libavfilter/x86/vf_stereo3d.asm| 47 +++--- libavfilter/x86/vf_stereo3d_init.c | 2 +- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/libavfilter/x86/vf_stereo3d.asm b/libavfilter/x86/vf_stereo3d.asm index 29a8c56..491579f 100644 --- a/libavfilter/x86/vf_stereo3d.asm +++ b/libavfilter/x86/vf_stereo3d.asm @@ -22,8 +22,6 @@ %include "libavutil/x86/x86util.asm" -%if ARCH_X86_64 - SECTION_RODATA ; rgbrgbrgbrgb @@ -37,10 +35,33 @@ ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1 SECTION .text INIT_XMM sse4 +%if ARCH_X86_64 cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesize, r_linesize, width, height, o, cnt %define ana_matrix_rq r6q %define ana_matrix_gq r7q %define ana_matrix_bq r8q + +%else ; ARCH_X86_32 +%if HAVE_ALIGNED_STACK +cglobal anaglyph, 3, 7, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesize, o, cnt +%else +cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, cnt +%define l_linesizeq r4mp +%endif ; HAVE_ALIGNED_STACK +%define ana_matrix_rq r3q +%define ana_matrix_gq r4q +%define ana_matrix_bq r5q +%define r_linesizeq r5mp +%define widthd r6mp +%define heightd r7mp +%define m8 [rsp+mmsize*12] +%define m9 [rsp+mmsize*13] +%define m10 [rsp+mmsize*14] +%define m11 [rsp+mmsize*15] +%define m12 [rsp+mmsize*16] +%define m13 [rsp+mmsize*17] +%endif ; ARCH + movana_matrix_rq, r8m movana_matrix_gq, r9m movana_matrix_bq, r10m @@ -74,6 +95,7 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesi mova [rsp+mmsize*10], m4 mova [rsp+mmsize*11], m5 +%if ARCH_X86_64 movu m11, [ana_matrix_bq+ 0] movq m13, [ana_matrix_bq+16] pshufdm8, m11, q @@ -84,6 +106,26 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesi pshufd m13, m13, q mov widthd, dword widthm mov heightd, dword heightm +%else +movu m3, [ana_matrix_bq+ 0] +movq m5, [ana_matrix_bq+16] +pshufdm0, m3, q +pshufdm1, m3, q +pshufdm2, m3, q +pshufdm3, m3, q +pshufdm4, m5, q +pshufdm5, m5, q +mova [rsp+mmsize*12], m0 +mova [rsp+mmsize*13], m1 +mova [rsp+mmsize*14], m2 +mova [rsp+mmsize*15], m3 +mova [rsp+mmsize*16], m4 +mova [rsp+mmsize*17], m5 +movdst_linesizeq, r3m +%if HAVE_ALIGNED_STACK +mov l_linesizeq, r4m +%endif +%endif ; ARCH .nextrow: mov od, widthd @@ -172,4 +214,3 @@ cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesi sub heightd, 1 jg .nextrow REP_RET -%endif diff --git a/libavfilter/x86/vf_stereo3d_init.c b/libavfilter/x86/vf_stereo3d_init.c index 77d4f7b..da160a8 100644 --- a/libavfilter/x86/vf_stereo3d_init.c +++ b/libavfilter/x86/vf_stereo3d_init.c @@ -31,7 +31,7 @@ void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); -if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) { +if (EXTERNAL_SSE4(cpu_flags)) { dsp->anaglyph = ff_anaglyph_sse4; } } -- 2.6.3 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel