Re: [FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version
> -Original Message- > From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf Of > James Almer > Sent: Tuesday, September 17, 2019 09:56 PM > To: ffmpeg-devel@ffmpeg.org > Subject: Re: [FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version > > On 9/17/2019 10:39 AM, Ting Fu wrote: > > Signed-off-by: Ting Fu > > --- > > libavfilter/x86/vf_eq.asm| 19 +-- > > libavfilter/x86/vf_eq_init.c | 20 > > 2 files changed, 37 insertions(+), 2 deletions(-) > > > > diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm > > index bf28691297..d6b51cf6df 100644 > > --- a/libavfilter/x86/vf_eq.asm > > +++ b/libavfilter/x86/vf_eq.asm > > @@ -24,14 +24,21 @@ > > > > SECTION .text > > > > -INIT_MMX mmx > > +%macro PROCESS_ONE_LINE 1 > > cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w > > movd m3, contrastd > > movd m4, brightnessd > > movsx r5d, contrastw > > movsx r6d, brightnessw > > +%if mmsize == 8 > > pshufw m3, m3, 0 > > pshufw m4, m4, 0 > > pshufw isn't mmx, but mmxext (AKA, integer SSE). Hardly a problem since i > don't > think anyone using a Pentium 2 will use this filter, but still it's > technically wrong. > > The function should be changed into mmxext to reflect the above. > Thank you for your reviw. It's ture that pshufw belongs to mmxext, and I have updated the related codes. > > +%elif mmsize == 16 > > +pshuflw m3, m3, 0 > > +movlhps m3, m3 > > +pshuflw m4, m4, 0 > > +movlhps m4, m4 > > +%endif > > You can use the SPLATW macro instead. It will expand to the correct > instructions > based on what set is enabled. Yes, the SPLATW macro can totally replace the instructions above. Sorry, I didn't know before. And I have replaced above codes with SPLATW in PATCH V2. > > > > > DEFINE_ARGS src, dst, tmp, scalar, w > > xor tmpd, tmpd > > @@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, > brightness, w > > pxor m1, m1 > > mov scalard, wd > > and scalard, mmsize-1 > > -sar wd, 3 > > +sar wd, %1 > > cmp wd, 1 > > jl .loop1 > > > > @@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, > > contrast, brightness, w > > > > .end: > > RET > > + > > +%endmacro > > + > > +INIT_MMX mmx > > +PROCESS_ONE_LINE 3 > > + > > +INIT_XMM sse2 > > +PROCESS_ONE_LINE 4 > > diff --git a/libavfilter/x86/vf_eq_init.c > > b/libavfilter/x86/vf_eq_init.c index 63c69078fb..cdd5272220 100644 > > --- a/libavfilter/x86/vf_eq_init.c > > +++ b/libavfilter/x86/vf_eq_init.c > > @@ -28,6 +28,8 @@ > > > > extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int > contvec, > > int brvec, int w); > > +extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int > contvec, > > +int brvec, int w); > > > > static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride, > > const uint8_t *src, int src_stride, int w, > > int h) @@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param, > uint8_t *dst, int dst_stride, > > emms_c(); > > } > > > > +static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride, > > +const uint8_t *src, int src_stride, int w, > > +int h) { > > +short contrast = (short) (param->contrast * 256 * 16); > > +short brightness = ((short) (100.0 * param->brightness + 100.0) * 511) > > + / 200 - 128 - contrast / 32; > > + > > +while (h--) { > > +ff_process_one_line_sse2(src, dst, contrast, brightness, w); > > +src += src_stride; > > +dst += dst_stride; > > +} > > +emms_c(); > > No need for this since the SSE2 version uses xmm regs. > Of course the xmm registers no longer need the emms_c(), my mistake. Thank for your review again and I have post PATH V2 to ffmpeg-devel. It's welcome to take a further look. > > +} > > + > > av_cold void ff_eq_init_x86(EQContext *eq) { > > int cpu_flags = av_get_cpu_flags(); @@ -51,5 +68,8 @@ av_cold > > void ff_eq_init_x86(EQContext *eq) > > if (cpu_flags & AV_CPU_FLAG_MMX) { > > eq->process = process_mmx; > > } > > +if (cpu_flags & AV_CPU_FLAG_SSE2) { > > +eq->process = process_sse2; > > +} > > } > > > > > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org > with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version
On 9/17/2019 10:39 AM, Ting Fu wrote: > Signed-off-by: Ting Fu > --- > libavfilter/x86/vf_eq.asm| 19 +-- > libavfilter/x86/vf_eq_init.c | 20 > 2 files changed, 37 insertions(+), 2 deletions(-) > > diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm > index bf28691297..d6b51cf6df 100644 > --- a/libavfilter/x86/vf_eq.asm > +++ b/libavfilter/x86/vf_eq.asm > @@ -24,14 +24,21 @@ > > SECTION .text > > -INIT_MMX mmx > +%macro PROCESS_ONE_LINE 1 > cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w > movd m3, contrastd > movd m4, brightnessd > movsx r5d, contrastw > movsx r6d, brightnessw > +%if mmsize == 8 > pshufw m3, m3, 0 > pshufw m4, m4, 0 pshufw isn't mmx, but mmxext (AKA, integer SSE). Hardly a problem since i don't think anyone using a Pentium 2 will use this filter, but still it's technically wrong. The function should be changed into mmxext to reflect the above. > +%elif mmsize == 16 > +pshuflw m3, m3, 0 > +movlhps m3, m3 > +pshuflw m4, m4, 0 > +movlhps m4, m4 > +%endif You can use the SPLATW macro instead. It will expand to the correct instructions based on what set is enabled. > > DEFINE_ARGS src, dst, tmp, scalar, w > xor tmpd, tmpd > @@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, > brightness, w > pxor m1, m1 > mov scalard, wd > and scalard, mmsize-1 > -sar wd, 3 > +sar wd, %1 > cmp wd, 1 > jl .loop1 > > @@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, > brightness, w > > .end: > RET > + > +%endmacro > + > +INIT_MMX mmx > +PROCESS_ONE_LINE 3 > + > +INIT_XMM sse2 > +PROCESS_ONE_LINE 4 > diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c > index 63c69078fb..cdd5272220 100644 > --- a/libavfilter/x86/vf_eq_init.c > +++ b/libavfilter/x86/vf_eq_init.c > @@ -28,6 +28,8 @@ > > extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int > contvec, > int brvec, int w); > +extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int > contvec, > +int brvec, int w); > > static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride, > const uint8_t *src, int src_stride, int w, int h) > @@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param, uint8_t *dst, > int dst_stride, > emms_c(); > } > > +static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride, > +const uint8_t *src, int src_stride, int w, int h) > +{ > +short contrast = (short) (param->contrast * 256 * 16); > +short brightness = ((short) (100.0 * param->brightness + 100.0) * 511) > + / 200 - 128 - contrast / 32; > + > +while (h--) { > +ff_process_one_line_sse2(src, dst, contrast, brightness, w); > +src += src_stride; > +dst += dst_stride; > +} > +emms_c(); No need for this since the SSE2 version uses xmm regs. > +} > + > av_cold void ff_eq_init_x86(EQContext *eq) > { > int cpu_flags = av_get_cpu_flags(); > @@ -51,5 +68,8 @@ av_cold void ff_eq_init_x86(EQContext *eq) > if (cpu_flags & AV_CPU_FLAG_MMX) { > eq->process = process_mmx; > } > +if (cpu_flags & AV_CPU_FLAG_SSE2) { > +eq->process = process_sse2; > +} > } > > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version
Signed-off-by: Ting Fu --- libavfilter/x86/vf_eq.asm| 19 +-- libavfilter/x86/vf_eq_init.c | 20 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm index bf28691297..d6b51cf6df 100644 --- a/libavfilter/x86/vf_eq.asm +++ b/libavfilter/x86/vf_eq.asm @@ -24,14 +24,21 @@ SECTION .text -INIT_MMX mmx +%macro PROCESS_ONE_LINE 1 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w movd m3, contrastd movd m4, brightnessd movsx r5d, contrastw movsx r6d, brightnessw +%if mmsize == 8 pshufw m3, m3, 0 pshufw m4, m4, 0 +%elif mmsize == 16 +pshuflw m3, m3, 0 +movlhps m3, m3 +pshuflw m4, m4, 0 +movlhps m4, m4 +%endif DEFINE_ARGS src, dst, tmp, scalar, w xor tmpd, tmpd @@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w pxor m1, m1 mov scalard, wd and scalard, mmsize-1 -sar wd, 3 +sar wd, %1 cmp wd, 1 jl .loop1 @@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w .end: RET + +%endmacro + +INIT_MMX mmx +PROCESS_ONE_LINE 3 + +INIT_XMM sse2 +PROCESS_ONE_LINE 4 diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c index 63c69078fb..cdd5272220 100644 --- a/libavfilter/x86/vf_eq_init.c +++ b/libavfilter/x86/vf_eq_init.c @@ -28,6 +28,8 @@ extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int contvec, int brvec, int w); +extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int contvec, +int brvec, int w); static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int w, int h) @@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride, emms_c(); } +static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride, +const uint8_t *src, int src_stride, int w, int h) +{ +short contrast = (short) (param->contrast * 256 * 16); +short brightness = ((short) (100.0 * param->brightness + 100.0) * 511) + / 200 - 128 - contrast / 32; + +while (h--) { +ff_process_one_line_sse2(src, dst, contrast, brightness, w); +src += src_stride; +dst += dst_stride; +} +emms_c(); +} + av_cold void ff_eq_init_x86(EQContext *eq) { int cpu_flags = av_get_cpu_flags(); @@ -51,5 +68,8 @@ av_cold void ff_eq_init_x86(EQContext *eq) if (cpu_flags & AV_CPU_FLAG_MMX) { eq->process = process_mmx; } +if (cpu_flags & AV_CPU_FLAG_SSE2) { +eq->process = process_sse2; +} } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".