Why are we using x87 asm instead of sse2 intrinsics for lrint/lrintf ?
E.g.: why not do something like the following?
diff --git a/mingw-w64-crt/math/lrint.c b/mingw-w64-crt/math/lrint.c
index ec80e4e..7831446 100644
--- a/mingw-w64-crt/math/lrint.c
+++ b/mingw-w64-crt/math/lrint.c
@@ -5,10 +5,16 @@
*/
#include <math.h>
+#if defined(_AMD64_) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+
long lrint (double x)
{
long retval = 0L;
+#if defined(_AMD64_) || defined(__x86_64__)
+ retval = _mm_cvtsd_si32(_mm_load_sd(&x));
-#if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) ||
defined(__i386__)
+#elif defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st");
#elif defined(__arm__) || defined(_ARM_)
float temp;
diff --git a/mingw-w64-crt/math/lrintf.c b/mingw-w64-crt/math/lrintf.c
index 91fc5e1..1e8902f 100644
--- a/mingw-w64-crt/math/lrintf.c
+++ b/mingw-w64-crt/math/lrintf.c
@@ -5,10 +5,16 @@
*/
#include <math.h>
+#if defined(_AMD64_) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+
long lrintf (float x)
{
long retval = 0l;
+#if defined(_AMD64_) || defined(__x86_64__)
+ retval = _mm_cvtss_si32(_mm_load_ss(&x));
-#if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) ||
defined(__i386__)
+#elif defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st");
#elif defined(__arm__) || defined(_ARM_)
__asm__ __volatile__ (
diff --git a/mingw-w64-crt/math/lrintl.c b/mingw-w64-crt/math/lrintl.c
index d710fac..9f1be51 100644
--- a/mingw-w64-crt/math/lrintl.c
+++ b/mingw-w64-crt/math/lrintl.c
@@ -5,10 +5,16 @@
*/
#include <math.h>
+#if defined(_AMD64_) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+
long lrintl (long double x)
{
long retval = 0l;
+#if defined(_AMD64_) || defined(__x86_64__)
+ retval = _mm_cvtsd_si64(_mm_load_sd(&x));
-#if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) ||
defined(__i386__)
+#elif defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st");
#elif defined(__arm__) || defined(_ARM_) || defined(__aarch64__) ||
defined(_ARM64_)
retval = lrint(x);
--
O.S.
_______________________________________________
Mingw-w64-public mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public