Hi

The attached patch adds MULX intrinsic in bmi2 header. This patch tries
to keep sync with GCC 4.7 and ICC (supporting BMI2) even though it's not
required from BMI2 reference manual [1]. MULX intrinsics just provides
developers a convenient form of 128-/64-bit multiplication followed by
hi/lo part splitting. Test case is revised as well. BMI2 code generation
patch [2] is under review.

Thanks for your review.

Yours
- Michael

--------------
[1] http://software.intel.com/sites/default/files/319433-014.pdf
[2]
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120917/151168.html

>From 3468a4316a841d6713a483064f363774e0e186e3 Mon Sep 17 00:00:00 2001
From: Michael Liao <[email protected]>
Date: Mon, 23 Jul 2012 16:37:25 -0700
Subject: [PATCH] Add intrinsic header for MULX

---
 lib/Headers/bmi2intrin.h     |   19 +++++++++++++++++++
 test/CodeGen/bmi2-builtins.c |   17 +++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/lib/Headers/bmi2intrin.h b/lib/Headers/bmi2intrin.h
index c60b0c4..a05cfad 100644
--- a/lib/Headers/bmi2intrin.h
+++ b/lib/Headers/bmi2intrin.h
@@ -70,6 +70,25 @@ _pext_u64(unsigned long long __X, unsigned long long __Y)
   return __builtin_ia32_pext_di(__X, __Y);
 }
 
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+_mulx_u64 (unsigned long long __X, unsigned long long __Y,
+	   unsigned long long *__P)
+{
+  unsigned __int128 __res = (unsigned __int128) __X * __Y;
+  *__P = (unsigned long long) (__res >> 64);
+  return (unsigned long long) __res;
+}
+
+#else /* !__x86_64__ */
+
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
+{
+  unsigned long long __res = (unsigned long long) __X * __Y;
+  *__P = (unsigned int) (__res >> 32);
+  return (unsigned int) __res;
+}
+
 #endif /* !__x86_64__  */
 
 #endif /* __BMI2INTRIN_H */
diff --git a/test/CodeGen/bmi2-builtins.c b/test/CodeGen/bmi2-builtins.c
index 18b2319..201cac6 100644
--- a/test/CodeGen/bmi2-builtins.c
+++ b/test/CodeGen/bmi2-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O3 -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -20,6 +21,15 @@ unsigned int test_pext_u32(unsigned int __X, unsigned int __Y) {
   return _pext_u32(__X, __Y);
 }
 
+unsigned int test_mulx_u32(unsigned int __X, unsigned int __Y,
+                                 unsigned int *__P) {
+  // CHECK: @test_mulx_u32
+  // CHECK-NOT: mul i64
+  // B32: @test_mulx_u32
+  // B32: mul i64
+  return _mulx_u32(__X, __Y, __P);
+}
+
 unsigned long long test_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
   // CHECK: @llvm.x86.bmi.bzhi.64
   return _bzhi_u64(__X, __Y);
@@ -34,3 +44,10 @@ unsigned long long test_pext_u64(unsigned long long __X, unsigned long long __Y)
   // CHECK: @llvm.x86.bmi.pext.64
   return _pext_u64(__X, __Y);
 }
+
+unsigned long long test_mulx_u64(unsigned long long __X, unsigned long long __Y,
+                                 unsigned long long *__P) {
+  // CHECK: @test_mulx_u64
+  // CHECK: mul i128
+  return _mulx_u64(__X, __Y, __P);
+}
-- 
1.7.9.5

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to