On 2012-07-01 13:08:47 +0100, Måns Rullgård wrote:
> Signed-off-by: Mans Rullgard <[email protected]>
> ---
> libavfilter/gradfun.h | 7 ++-----
> libavfilter/vf_gradfun.c | 9 ++-------
> libavfilter/x86/gradfun.c | 29 +++++++++++++++++++++--------
> 3 files changed, 25 insertions(+), 20 deletions(-)
>
> diff --git a/libavfilter/gradfun.h b/libavfilter/gradfun.h
> index 6b192a3..5d01130 100644
> --- a/libavfilter/gradfun.h
> +++ b/libavfilter/gradfun.h
> @@ -37,12 +37,9 @@ typedef struct {
> void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t
> *src, int src_linesize, int width);
> } GradFunContext;
>
> +void ff_gradfun_init_x86(GradFunContext *gf);
> +
> void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int
> width, int thresh, const uint16_t *dithers);
> void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
> uint8_t *src, int src_linesize, int width);
>
> -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc,
> int width, int thresh, const uint16_t *dithers);
> -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc,
> int width, int thresh, const uint16_t *dithers);
> -
> -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
> uint8_t *src, int src_linesize, int width);
> -
> #endif /* AVFILTER_GRADFUN_H */
> diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c
> index 12977d9..71749fe 100644
> --- a/libavfilter/vf_gradfun.c
> +++ b/libavfilter/vf_gradfun.c
> @@ -123,7 +123,6 @@ static av_cold int init(AVFilterContext *ctx, const char
> *args)
> GradFunContext *gf = ctx->priv;
> float thresh = 1.2;
> int radius = 16;
> - int cpu_flags = av_get_cpu_flags();
>
> if (args)
> sscanf(args, "%f:%d", &thresh, &radius);
> @@ -135,12 +134,8 @@ static av_cold int init(AVFilterContext *ctx, const char
> *args)
> gf->blur_line = ff_gradfun_blur_line_c;
> gf->filter_line = ff_gradfun_filter_line_c;
>
> - if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2)
> - gf->filter_line = ff_gradfun_filter_line_mmx2;
> - if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
> - gf->filter_line = ff_gradfun_filter_line_ssse3;
> - if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
> - gf->blur_line = ff_gradfun_blur_line_sse2;
> + if (HAVE_MMX)
> + ff_gradfun_init_x86(gf);
>
> av_log(ctx, AV_LOG_VERBOSE, "threshold:%.2f radius:%d\n", thresh,
> gf->radius);
>
> diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c
> index ff3b19d..79d6617 100644
> --- a/libavfilter/x86/gradfun.c
> +++ b/libavfilter/x86/gradfun.c
> @@ -18,6 +18,7 @@
> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> */
>
> +#include "libavutil/attributes.h"
> #include "libavutil/cpu.h"
> #include "libavutil/x86_cpu.h"
> #include "libavfilter/gradfun.h"
> @@ -25,9 +26,9 @@
> DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] =
> {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
> DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] =
> {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
>
> -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc,
> int width, int thresh, const uint16_t *dithers)
> -{
> #if HAVE_MMX
> +static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t
> *dc, int width, int thresh, const uint16_t *dithers)
we have HAVE_MMX2 if you like change this inconsistency. Or we could
remove the mmx2 check in configure since there aren't many systems
around which can't assemble mmx2 instructions
> +{
> intptr_t x;
> if (width & 3) {
> x = width & ~3;
> @@ -70,12 +71,12 @@ void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t
> *src, uint16_t *dc, int w
> "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
> :"memory"
> );
> -#endif
> }
> +#endif
>
> -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc,
> int width, int thresh, const uint16_t *dithers)
> -{
> #if HAVE_SSSE3
> +static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t
> *dc, int width, int thresh, const uint16_t *dithers)
> +{
> intptr_t x;
> if (width & 7) {
> // could be 10% faster if I somehow eliminated this
> @@ -117,12 +118,12 @@ void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t
> *src, uint16_t *dc, int
> "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
> :"memory"
> );
> -#endif // HAVE_SSSE3
> }
> +#endif // HAVE_SSSE3
>
> -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
> uint8_t *src, int src_linesize, int width)
> -{
> #if HAVE_SSE
> +static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t
> *buf1, uint8_t *src, int src_linesize, int width)
> +{
> #define BLURV(load)\
> intptr_t x = -2*width;\
> __asm__ volatile(\
> @@ -160,5 +161,17 @@ void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t
> *buf, uint16_t *buf1, uint
> } else {
> BLURV("movdqa");
> }
> +}
> #endif // HAVE_SSE
> +
> +av_cold void ff_gradfun_init_x86(GradFunContext *gf)
> +{
> + int cpu_flags = av_get_cpu_flags();
> +
> + if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2)
> + gf->filter_line = gradfun_filter_line_mmx2;
see above
> + if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
> + gf->filter_line = gradfun_filter_line_ssse3;
> + if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
> + gf->blur_line = gradfun_blur_line_sse2;
> }
ok
Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel