vlc | branch: master | Rémi Denis-Courmont <[email protected]> | Sat Aug 4 16:45:10 2012 +0300| [0ed12fd66fe7dbd390ed90f33fd137ba0f51908d] | committer: Rémi Denis-Courmont
Check for SSE4 at build time where possible > http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=0ed12fd66fe7dbd390ed90f33fd137ba0f51908d --- include/vlc_cpu.h | 24 +++++++++++++++++++++--- modules/codec/avcodec/avcodec.c | 7 +++---- modules/codec/avcodec/copy.c | 7 ++++++- modules/codec/avcodec/encoder.c | 7 +++---- modules/stream_out/switcher.c | 8 ++++---- src/misc/cpu.c | 18 +++++++----------- src/posix/linux_cpu.c | 16 +++------------- 7 files changed, 47 insertions(+), 40 deletions(-) diff --git a/include/vlc_cpu.h b/include/vlc_cpu.h index 9753687..a87589d 100644 --- a/include/vlc_cpu.h +++ b/include/vlc_cpu.h @@ -37,9 +37,9 @@ VLC_API unsigned vlc_CPU(void); # define VLC_CPU_SSE2 128 # define VLC_CPU_SSE3 256 # define VLC_CPU_SSSE3 512 -# define CPU_CAPABILITY_SSE4_1 (1<<10) -# define CPU_CAPABILITY_SSE4_2 (1<<11) -# define CPU_CAPABILITY_SSE4A (1<<12) +# define VLC_CPU_SSE4_1 1024 +# define VLC_CPU_SSE4_2 2048 +# define VLC_CPU_SSE4A 4096 # if defined (__MMX__) # define vlc_CPU_MMX() (1) @@ -85,6 +85,24 @@ VLC_API unsigned vlc_CPU(void); # define vlc_CPU_SSSE3() ((vlc_CPU() & VLC_CPU_SSSE3) != 0) # endif +# ifdef __SSE4_1__ +# define vlc_CPU_SSE4_1() (1) +# else +# define vlc_CPU_SSE4_1() ((vlc_CPU() & VLC_CPU_SSE4_1) != 0) +# endif + +# ifdef __SSE4_2__ +# define vlc_CPU_SSE4_2() (1) +# else +# define vlc_CPU_SSE4_2() ((vlc_CPU() & VLC_CPU_SSE4_2) != 0) +# endif + +# ifdef __SSE4A__ +# define vlc_CPU_SSE4A() (1) +# else +# define vlc_CPU_SSE4A() ((vlc_CPU() & VLC_CPU_SSE4A) != 0) +# endif + # elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__) # define HAVE_FPU 1 # define VLC_CPU_ALTIVEC 2 diff --git a/modules/codec/avcodec/avcodec.c b/modules/codec/avcodec/avcodec.c index a4e3d82..4159cc0 100644 --- a/modules/codec/avcodec/avcodec.c +++ b/modules/codec/avcodec/avcodec.c @@ -331,12 +331,11 @@ static int OpenDecoder( vlc_object_t *p_this ) /* Set CPU capabilities */ p_context->dsp_mask = 0; #if defined (__i386__) || defined (__x86_64__) - unsigned i_cpu = vlc_CPU(); if( !vlc_CPU_MMX() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX; if( !vlc_CPU_MMXEXT() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX2; - if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) + if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) ) p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; if( !vlc_CPU_SSE() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE; @@ -351,11 +350,11 @@ static int OpenDecoder( vlc_object_t *p_this ) p_context->dsp_mask |= AV_CPU_FLAG_SSSE3; # endif # ifdef AV_CPU_FLAG_SSE4 - if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) + if( !vlc_CPU_SSE4_1() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE4; # endif # ifdef AV_CPU_FLAG_SSE42 - if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) + if( !vlc_CPU_SSE4_2() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE42; # endif #endif diff --git a/modules/codec/avcodec/copy.c b/modules/codec/avcodec/copy.c index 71758ef..2828923 100644 --- a/modules/codec/avcodec/copy.c +++ b/modules/codec/avcodec/copy.c @@ -47,6 +47,11 @@ store " %%xmm4, 48(%[dst])\n" \ : : [dst]"r"(dstp), [src]"r"(srcp) : "memory") +#ifndef __SSE4A__ +# undef vlc_CPU_SSE4A +# define vlc_CPU_SSE4A() ((cpu & VLC_CPU_SSE4A) != 0) +#endif + #ifndef __SSSE3__ # undef vlc_CPU_SSSE3 # define vlc_CPU_SSSE3() ((cpu & VLC_CPU_SSSE3) != 0) @@ -88,7 +93,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch, dst[x] = src[x]; #ifdef CAN_COMPILE_SSE4_1 - if (cpu & CPU_CAPABILITY_SSE4_1) { + if (vlc_CPU_SSE4_1()) { if (!unaligned) { for (; x+63 < width; x += 64) COPY64(&dst[x], &src[x], "movntdqa", "movdqa"); diff --git a/modules/codec/avcodec/encoder.c b/modules/codec/avcodec/encoder.c index 7101df1..02888ef 100644 --- a/modules/codec/avcodec/encoder.c +++ b/modules/codec/avcodec/encoder.c @@ -325,12 +325,11 @@ int OpenEncoder( vlc_object_t *p_this ) /* Set CPU capabilities */ p_context->dsp_mask = 0; #if defined (__i386__) || defined (__x86_64__) - unsigned i_cpu = vlc_CPU(); if( !vlc_CPU_MMX() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX; if( !vlc_CPU_MMXEXT() ) p_context->dsp_mask |= AV_CPU_FLAG_MMX2; - if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) + if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) ) p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; if( !vlc_CPU_SSE() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE; @@ -345,11 +344,11 @@ int OpenEncoder( vlc_object_t *p_this ) p_context->dsp_mask |= AV_CPU_FLAG_SSSE3; # endif # ifdef AV_CPU_FLAG_SSE4 - if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) + if( !vlc_CPU_SSE4_1() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE4; # endif # ifdef AV_CPU_FLAG_SSE42 - if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) + if( !vlc_CPU_SSE4_2() ) p_context->dsp_mask |= AV_CPU_FLAG_SSE42; # endif #endif diff --git a/modules/stream_out/switcher.c b/modules/stream_out/switcher.c index b6ebf52..bb0c1b3 100644 --- a/modules/stream_out/switcher.c +++ b/modules/stream_out/switcher.c @@ -400,11 +400,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3; # endif # ifdef AV_CPU_FLAG_SSE4 - if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) + if( !vlc_CPU_SSE4_1() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4; # endif # ifdef AV_CPU_FLAG_SSE42 - if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) + if( !vlc_CPU_SSE4_2() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42; # endif #endif @@ -821,11 +821,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3; # endif # ifdef AV_CPU_FLAG_SSE4 - if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) + if( !vlc_CPU_SSE4_1() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4; # endif # ifdef AV_CPU_FLAG_SSE42 - if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) + if( !vlc_CPU_SSE4_2() ) id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42; # endif #endif diff --git a/src/misc/cpu.c b/src/misc/cpu.c index e0ae7b9..f7e9bfe 100644 --- a/src/misc/cpu.c +++ b/src/misc/cpu.c @@ -247,18 +247,14 @@ void vlc_CPU_init (void) i_capabilities |= VLC_CPU_SSSE3; # endif -# if defined (__SSE4_1__) - i_capabilities |= CPU_CAPABILITY_SSE4_1; -# elif defined (CAN_COMPILE_SSE4_1) +# if defined (CAN_COMPILE_SSE4_1) if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test)) - i_capabilities |= CPU_CAPABILITY_SSE4_1; + i_capabilities |= VLC_CPU_SSE4_1; # endif -# if defined (__SSE4_2__) - i_capabilities |= CPU_CAPABILITY_SSE4_2; -# elif defined (CAN_COMPILE_SSE4_2) +# if defined (CAN_COMPILE_SSE4_2) if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test)) - i_capabilities |= CPU_CAPABILITY_SSE4_2; + i_capabilities |= VLC_CPU_SSE4_2; # endif /* test for additional capabilities */ @@ -345,9 +341,9 @@ void vlc_CPU_dump (vlc_object_t *obj) if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");; if (vlc_CPU_SSE3()) p += sprintf (p, "SSE2 ");; if (vlc_CPU_SSSE3()) p += sprintf (p, "SSSE3 ");; - PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1"); - PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2"); - PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A"); + if (vlc_CPU_SSE4_1()) p += sprintf (p, "SSE4.1 ");; + if (vlc_CPU_SSE4_2()) p += sprintf (p, "SSE4.2 ");; + if (vlc_CPU_SSE4A()) p += sprintf (p, "SSE4A ");; PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!"); #elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__) diff --git a/src/posix/linux_cpu.c b/src/posix/linux_cpu.c index f239d2c..edb4f24 100644 --- a/src/posix/linux_cpu.c +++ b/src/posix/linux_cpu.c @@ -79,16 +79,12 @@ static void vlc_CPU_init (void) core_caps |= VLC_CPU_SSE3; if (!strcmp (cap, "ssse3")) core_caps |= VLC_CPU_SSSE3; -# ifndef __SSE4_1__ if (!strcmp (cap, "sse4_1")) - core_caps |= CPU_CAPABILITY_SSE4_1; -# endif -# ifndef __SSE4_2__ + core_caps |= VLC_CPU_SSE4_1; if (!strcmp (cap, "sse4_2")) - core_caps |= CPU_CAPABILITY_SSE4_1; -# endif + core_caps |= VLC_CPU_SSE4_1; if (!strcmp (cap, "sse4a")) - core_caps |= CPU_CAPABILITY_SSE4A; + core_caps |= VLC_CPU_SSE4A; # ifndef __3dNOW__ if (!strcmp (cap, "3dnow")) core_caps |= CPU_CAPABILITY_3DNOW; @@ -111,12 +107,6 @@ static void vlc_CPU_init (void) /* Always enable capabilities that were forced during compilation */ #if defined (__i386__) || defined (__x86_64__) -# ifdef __SSE4_1__ - all_caps |= CPU_CAPABILITY_SSE4_1; -# endif -# ifdef __SSE4_2__ - all_caps |= CPU_CAPABILITY_SSE4_2; -# endif # ifdef __3dNOW__ all_caps |= CPU_CAPABILITY_3DNOW; # endif _______________________________________________ vlc-commits mailing list [email protected] http://mailman.videolan.org/listinfo/vlc-commits
