On 2012-07-01 13:08:47 +0100, Måns Rullgård wrote:
> Signed-off-by: Mans Rullgard <[email protected]>
> ---
>  libavfilter/gradfun.h     |    7 ++-----
>  libavfilter/vf_gradfun.c  |    9 ++-------
>  libavfilter/x86/gradfun.c |   29 +++++++++++++++++++++--------
>  3 files changed, 25 insertions(+), 20 deletions(-)
> 
> diff --git a/libavfilter/gradfun.h b/libavfilter/gradfun.h
> index 6b192a3..5d01130 100644
> --- a/libavfilter/gradfun.h
> +++ b/libavfilter/gradfun.h
> @@ -37,12 +37,9 @@ typedef struct {
>      void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t 
> *src, int src_linesize, int width);
>  } GradFunContext;
>  
> +void ff_gradfun_init_x86(GradFunContext *gf);
> +
>  void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int 
> width, int thresh, const uint16_t *dithers);
>  void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, 
> uint8_t *src, int src_linesize, int width);
>  
> -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, 
> int width, int thresh, const uint16_t *dithers);
> -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, 
> int width, int thresh, const uint16_t *dithers);
> -
> -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, 
> uint8_t *src, int src_linesize, int width);
> -
>  #endif /* AVFILTER_GRADFUN_H */
> diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c
> index 12977d9..71749fe 100644
> --- a/libavfilter/vf_gradfun.c
> +++ b/libavfilter/vf_gradfun.c
> @@ -123,7 +123,6 @@ static av_cold int init(AVFilterContext *ctx, const char 
> *args)
>      GradFunContext *gf = ctx->priv;
>      float thresh = 1.2;
>      int radius = 16;
> -    int cpu_flags = av_get_cpu_flags();
>  
>      if (args)
>          sscanf(args, "%f:%d", &thresh, &radius);
> @@ -135,12 +134,8 @@ static av_cold int init(AVFilterContext *ctx, const char 
> *args)
>      gf->blur_line = ff_gradfun_blur_line_c;
>      gf->filter_line = ff_gradfun_filter_line_c;
>  
> -    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2)
> -        gf->filter_line = ff_gradfun_filter_line_mmx2;
> -    if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
> -        gf->filter_line = ff_gradfun_filter_line_ssse3;
> -    if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
> -        gf->blur_line = ff_gradfun_blur_line_sse2;
> +    if (HAVE_MMX)
> +        ff_gradfun_init_x86(gf);
>  
>      av_log(ctx, AV_LOG_VERBOSE, "threshold:%.2f radius:%d\n", thresh, 
> gf->radius);
>  
> diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c
> index ff3b19d..79d6617 100644
> --- a/libavfilter/x86/gradfun.c
> +++ b/libavfilter/x86/gradfun.c
> @@ -18,6 +18,7 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
>   */
>  
> +#include "libavutil/attributes.h"
>  #include "libavutil/cpu.h"
>  #include "libavutil/x86_cpu.h"
>  #include "libavfilter/gradfun.h"
> @@ -25,9 +26,9 @@
>  DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = 
> {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
>  DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = 
> {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
>  
> -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, 
> int width, int thresh, const uint16_t *dithers)
> -{
>  #if HAVE_MMX
> +static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t 
> *dc, int width, int thresh, const uint16_t *dithers)

we have HAVE_MMX2 if you like change this inconsistency. Or we could
remove the mmx2 check in configure since there aren't many systems
around which can't assemble mmx2 instructions

> +{
>      intptr_t x;
>      if (width & 3) {
>          x = width & ~3;
> @@ -70,12 +71,12 @@ void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t 
> *src, uint16_t *dc, int w
>           "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
>          :"memory"
>      );
> -#endif
>  }
> +#endif
>  
> -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, 
> int width, int thresh, const uint16_t *dithers)
> -{
>  #if HAVE_SSSE3
> +static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t 
> *dc, int width, int thresh, const uint16_t *dithers)
> +{
>      intptr_t x;
>      if (width & 7) {
>          // could be 10% faster if I somehow eliminated this
> @@ -117,12 +118,12 @@ void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t 
> *src, uint16_t *dc, int
>           "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
>          :"memory"
>      );
> -#endif // HAVE_SSSE3
>  }
> +#endif // HAVE_SSSE3
>  
> -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, 
> uint8_t *src, int src_linesize, int width)
> -{
>  #if HAVE_SSE
> +static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t 
> *buf1, uint8_t *src, int src_linesize, int width)
> +{
>  #define BLURV(load)\
>      intptr_t x = -2*width;\
>      __asm__ volatile(\
> @@ -160,5 +161,17 @@ void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t 
> *buf, uint16_t *buf1, uint
>      } else {
>          BLURV("movdqa");
>      }
> +}
>  #endif // HAVE_SSE
> +
> +av_cold void ff_gradfun_init_x86(GradFunContext *gf)
> +{
> +    int cpu_flags = av_get_cpu_flags();
> +
> +    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2)
> +        gf->filter_line = gradfun_filter_line_mmx2;

see above

> +    if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
> +        gf->filter_line = gradfun_filter_line_ssse3;
> +    if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
> +        gf->blur_line = gradfun_blur_line_sse2;
>  }

ok

Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to