Re: [FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version

2019-09-18 Thread Fu, Ting


> -Original Message-
> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf Of
> James Almer
> Sent: Tuesday, September 17, 2019 09:56 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version
> 
> On 9/17/2019 10:39 AM, Ting Fu wrote:
> > Signed-off-by: Ting Fu 
> > ---
> >  libavfilter/x86/vf_eq.asm| 19 +--
> >  libavfilter/x86/vf_eq_init.c | 20 
> >  2 files changed, 37 insertions(+), 2 deletions(-)
> >
> > diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
> > index bf28691297..d6b51cf6df 100644
> > --- a/libavfilter/x86/vf_eq.asm
> > +++ b/libavfilter/x86/vf_eq.asm
> > @@ -24,14 +24,21 @@
> >
> >  SECTION .text
> >
> > -INIT_MMX mmx
> > +%macro PROCESS_ONE_LINE 1
> >  cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
> >  movd m3, contrastd
> >  movd m4, brightnessd
> >  movsx r5d, contrastw
> >  movsx r6d, brightnessw
> > +%if mmsize == 8
> >  pshufw m3, m3, 0
> >  pshufw m4, m4, 0
> 
> pshufw isn't mmx, but mmxext (AKA, integer SSE). Hardly a problem since i 
> don't
> think anyone using a Pentium 2 will use this filter, but still it's 
> technically wrong.
> 
> The function should be changed into mmxext to reflect the above.
> 

Thank you for your reviw. It's ture that pshufw belongs to mmxext, and I have 
updated the related codes.

> > +%elif mmsize == 16
> > +pshuflw m3, m3, 0
> > +movlhps m3, m3
> > +pshuflw m4, m4, 0
> > +movlhps m4, m4
> > +%endif
> 
> You can use the SPLATW macro instead. It will expand to the correct 
> instructions
> based on what set is enabled.

Yes, the SPLATW macro can totally replace the instructions above. Sorry, I 
didn't know before.
And I have replaced above codes with SPLATW in PATCH V2.

> 
> >
> >  DEFINE_ARGS src, dst, tmp, scalar, w
> >  xor tmpd, tmpd
> > @@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast,
> brightness, w
> >  pxor m1, m1
> >  mov scalard, wd
> >  and scalard, mmsize-1
> > -sar wd, 3
> > +sar wd, %1
> >  cmp wd, 1
> >  jl .loop1
> >
> > @@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst,
> > contrast, brightness, w
> >
> >  .end:
> >  RET
> > +
> > +%endmacro
> > +
> > +INIT_MMX mmx
> > +PROCESS_ONE_LINE 3
> > +
> > +INIT_XMM sse2
> > +PROCESS_ONE_LINE 4
> > diff --git a/libavfilter/x86/vf_eq_init.c
> > b/libavfilter/x86/vf_eq_init.c index 63c69078fb..cdd5272220 100644
> > --- a/libavfilter/x86/vf_eq_init.c
> > +++ b/libavfilter/x86/vf_eq_init.c
> > @@ -28,6 +28,8 @@
> >
> >  extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int
> contvec,
> >  int brvec, int w);
> > +extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int
> contvec,
> > +int brvec, int w);
> >
> >  static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride,
> >  const uint8_t *src, int src_stride, int w,
> > int h) @@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param,
> uint8_t *dst, int dst_stride,
> >  emms_c();
> >  }
> >
> > +static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride,
> > +const uint8_t *src, int src_stride, int w,
> > +int h) {
> > +short contrast = (short) (param->contrast * 256 * 16);
> > +short brightness = ((short) (100.0 * param->brightness + 100.0) * 511)
> > +   / 200 - 128 - contrast / 32;
> > +
> > +while (h--) {
> > +ff_process_one_line_sse2(src, dst, contrast, brightness, w);
> > +src += src_stride;
> > +dst += dst_stride;
> > +}
> > +emms_c();
> 
> No need for this since the SSE2 version uses xmm regs.
>

Of course the xmm registers no longer need the emms_c(), my mistake.
Thank for your review again and I have post PATH V2 to ffmpeg-devel.
It's welcome to take a further look.
 
> > +}
> > +
> >  av_cold void ff_eq_init_x86(EQContext *eq)  {
> >  int cpu_flags = av_get_cpu_flags(); @@ -51,5 +68,8 @@ av_cold
> > void ff_eq_init_x86(EQContext *eq)
> >  if (cpu_flags & AV_CPU_FLAG_MMX) {
> >  eq->process = process_mmx;
> >  }
> > +if (cpu_flags & AV_CPU_FLAG_SSE2) {
> > +eq->process = process_sse2;
> > +}
> >  }
> >
> >
> 
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org
> with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version

2019-09-17 Thread James Almer
On 9/17/2019 10:39 AM, Ting Fu wrote:
> Signed-off-by: Ting Fu 
> ---
>  libavfilter/x86/vf_eq.asm| 19 +--
>  libavfilter/x86/vf_eq_init.c | 20 
>  2 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
> index bf28691297..d6b51cf6df 100644
> --- a/libavfilter/x86/vf_eq.asm
> +++ b/libavfilter/x86/vf_eq.asm
> @@ -24,14 +24,21 @@
>  
>  SECTION .text
>  
> -INIT_MMX mmx
> +%macro PROCESS_ONE_LINE 1
>  cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
>  movd m3, contrastd
>  movd m4, brightnessd
>  movsx r5d, contrastw
>  movsx r6d, brightnessw
> +%if mmsize == 8
>  pshufw m3, m3, 0
>  pshufw m4, m4, 0

pshufw isn't mmx, but mmxext (AKA, integer SSE). Hardly a problem since
i don't think anyone using a Pentium 2 will use this filter, but still
it's technically wrong.

The function should be changed into mmxext to reflect the above.

> +%elif mmsize == 16
> +pshuflw m3, m3, 0
> +movlhps m3, m3
> +pshuflw m4, m4, 0
> +movlhps m4, m4
> +%endif

You can use the SPLATW macro instead. It will expand to the correct
instructions based on what set is enabled.

>  
>  DEFINE_ARGS src, dst, tmp, scalar, w
>  xor tmpd, tmpd
> @@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
> brightness, w
>  pxor m1, m1
>  mov scalard, wd
>  and scalard, mmsize-1
> -sar wd, 3
> +sar wd, %1
>  cmp wd, 1
>  jl .loop1
>  
> @@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
> brightness, w
>  
>  .end:
>  RET
> +
> +%endmacro
> +
> +INIT_MMX mmx
> +PROCESS_ONE_LINE 3
> +
> +INIT_XMM sse2
> +PROCESS_ONE_LINE 4
> diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c
> index 63c69078fb..cdd5272220 100644
> --- a/libavfilter/x86/vf_eq_init.c
> +++ b/libavfilter/x86/vf_eq_init.c
> @@ -28,6 +28,8 @@
>  
>  extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int 
> contvec,
>  int brvec, int w);
> +extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int 
> contvec,
> +int brvec, int w);
>  
>  static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride,
>  const uint8_t *src, int src_stride, int w, int h)
> @@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param, uint8_t *dst, 
> int dst_stride,
>  emms_c();
>  }
>  
> +static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride,
> +const uint8_t *src, int src_stride, int w, int h)
> +{
> +short contrast = (short) (param->contrast * 256 * 16);
> +short brightness = ((short) (100.0 * param->brightness + 100.0) * 511)
> +   / 200 - 128 - contrast / 32;
> +
> +while (h--) {
> +ff_process_one_line_sse2(src, dst, contrast, brightness, w);
> +src += src_stride;
> +dst += dst_stride;
> +}
> +emms_c();

No need for this since the SSE2 version uses xmm regs.

> +}
> +
>  av_cold void ff_eq_init_x86(EQContext *eq)
>  {
>  int cpu_flags = av_get_cpu_flags();
> @@ -51,5 +68,8 @@ av_cold void ff_eq_init_x86(EQContext *eq)
>  if (cpu_flags & AV_CPU_FLAG_MMX) {
>  eq->process = process_mmx;
>  }
> +if (cpu_flags & AV_CPU_FLAG_SSE2) {
> +eq->process = process_sse2;
> +}
>  }
>  
> 

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version

2019-09-17 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/x86/vf_eq.asm| 19 +--
 libavfilter/x86/vf_eq_init.c | 20 
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
index bf28691297..d6b51cf6df 100644
--- a/libavfilter/x86/vf_eq.asm
+++ b/libavfilter/x86/vf_eq.asm
@@ -24,14 +24,21 @@
 
 SECTION .text
 
-INIT_MMX mmx
+%macro PROCESS_ONE_LINE 1
 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
 movd m3, contrastd
 movd m4, brightnessd
 movsx r5d, contrastw
 movsx r6d, brightnessw
+%if mmsize == 8
 pshufw m3, m3, 0
 pshufw m4, m4, 0
+%elif mmsize == 16
+pshuflw m3, m3, 0
+movlhps m3, m3
+pshuflw m4, m4, 0
+movlhps m4, m4
+%endif
 
 DEFINE_ARGS src, dst, tmp, scalar, w
 xor tmpd, tmpd
@@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 pxor m1, m1
 mov scalard, wd
 and scalard, mmsize-1
-sar wd, 3
+sar wd, %1
 cmp wd, 1
 jl .loop1
 
@@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 
 .end:
 RET
+
+%endmacro
+
+INIT_MMX mmx
+PROCESS_ONE_LINE 3
+
+INIT_XMM sse2
+PROCESS_ONE_LINE 4
diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c
index 63c69078fb..cdd5272220 100644
--- a/libavfilter/x86/vf_eq_init.c
+++ b/libavfilter/x86/vf_eq_init.c
@@ -28,6 +28,8 @@
 
 extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int 
contvec,
 int brvec, int w);
+extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int 
contvec,
+int brvec, int w);
 
 static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride,
 const uint8_t *src, int src_stride, int w, int h)
@@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param, uint8_t *dst, 
int dst_stride,
 emms_c();
 }
 
+static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride,
+const uint8_t *src, int src_stride, int w, int h)
+{
+short contrast = (short) (param->contrast * 256 * 16);
+short brightness = ((short) (100.0 * param->brightness + 100.0) * 511)
+   / 200 - 128 - contrast / 32;
+
+while (h--) {
+ff_process_one_line_sse2(src, dst, contrast, brightness, w);
+src += src_stride;
+dst += dst_stride;
+}
+emms_c();
+}
+
 av_cold void ff_eq_init_x86(EQContext *eq)
 {
 int cpu_flags = av_get_cpu_flags();
@@ -51,5 +68,8 @@ av_cold void ff_eq_init_x86(EQContext *eq)
 if (cpu_flags & AV_CPU_FLAG_MMX) {
 eq->process = process_mmx;
 }
+if (cpu_flags & AV_CPU_FLAG_SSE2) {
+eq->process = process_sse2;
+}
 }
 
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".