The lrint() and lrintf() functions are pretty slow and make some texture transfers very inefficient. This patch makes a better effort at using those intrisics for 32-bit gcc and MSVC.
Note, this patch doesn't address the use of SSE4.1 with MSVC. Reviewed-by: José Fonseca <[email protected]> --- src/util/rounding.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/util/rounding.h b/src/util/rounding.h index afb38fb..ab55ebb 100644 --- a/src/util/rounding.h +++ b/src/util/rounding.h @@ -29,9 +29,11 @@ #include <limits.h> #include <stdint.h> -#ifdef __x86_64__ +#if defined(__SSE__) || defined(_MSC_VER) +/* MSVC always has SSE nowadays */ #include <xmmintrin.h> #include <emmintrin.h> +#define ROUND_WITH_SSE 1 #endif #ifdef __SSE4_1__ @@ -95,7 +97,7 @@ _mesa_roundeven(double x) static inline long _mesa_lroundevenf(float x) { -#ifdef __x86_64__ +#ifdef ROUND_WITH_SSE #if LONG_MAX == INT64_MAX return _mm_cvtss_si64(_mm_load_ss(&x)); #elif LONG_MAX == INT32_MAX @@ -115,7 +117,7 @@ _mesa_lroundevenf(float x) static inline long _mesa_lroundeven(double x) { -#ifdef __x86_64__ +#ifdef ROUND_WITH_SSE #if LONG_MAX == INT64_MAX return _mm_cvtsd_si64(_mm_load_sd(&x)); #elif LONG_MAX == INT32_MAX @@ -128,4 +130,7 @@ _mesa_lroundeven(double x) #endif } + +#undef ROUND_WITH_SSE + #endif -- 1.9.1 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
