[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-08-19 Thread Martin Storsjö via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGcc3affd8b020: [clang] [MSVC] Implement __mulh and __umulh 
builtins for aarch64 (authored by mstorsjo).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106721/new/

https://reviews.llvm.org/D106721

Files:
  clang/include/clang/Basic/BuiltinsAArch64.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/intrin.h
  clang/test/CodeGen/arm64-microsoft-intrinsics.c


Index: clang/test/CodeGen/arm64-microsoft-intrinsics.c
===
--- clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -81,6 +81,28 @@
 // CHECK-MSVC: fence syncscope("singlethread")
 // CHECK-LINUX: error: implicit declaration of function '_ReadWriteBarrier'
 
+long long check_mulh(long long a, long long b) {
+  return __mulh(a, b);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]] = sext i64 {{.*}} to i128
+// CHECK-MSVC: %[[ARG2:.*]] = sext i64 {{.*}} to i128
+// CHECK-MSVC: %[[PROD:.*]] = mul nsw i128 %[[ARG1]], %[[ARG2]]
+// CHECK-MSVC: %[[HIGH:.*]] = ashr i128 %[[PROD]], 64
+// CHECK-MSVC: %[[RES:.*]] = trunc i128 %[[HIGH]] to i64
+// CHECK-LINUX: error: implicit declaration of function '__mulh'
+
+unsigned long long check_umulh(unsigned long long a, unsigned long long b) {
+  return __umulh(a, b);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]] = zext i64 {{.*}} to i128
+// CHECK-MSVC: %[[ARG2:.*]] = zext i64 {{.*}} to i128
+// CHECK-MSVC: %[[PROD:.*]] = mul nuw i128 %[[ARG1]], %[[ARG2]]
+// CHECK-MSVC: %[[HIGH:.*]] = lshr i128 %[[PROD]], 64
+// CHECK-MSVC: %[[RES:.*]] = trunc i128 %[[HIGH]] to i64
+// CHECK-LINUX: error: implicit declaration of function '__umulh'
+
 unsigned __int64 check__getReg() {
   unsigned volatile __int64 reg;
   reg = __getReg(18);
Index: clang/lib/Headers/intrin.h
===
--- clang/lib/Headers/intrin.h
+++ clang/lib/Headers/intrin.h
@@ -574,6 +574,9 @@
 unsigned short __cdecl _byteswap_ushort(unsigned short val);
 unsigned long __cdecl _byteswap_ulong (unsigned long val);
 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
+
+__int64 __mulh(__int64 __a, __int64 __b);
+unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b);
 #endif
 
 
/**\
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,29 @@
 return Builder.CreateCall(F);
   }
 
+  if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) {
+llvm::Type *ResType = ConvertType(E->getType());
+llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
+
+bool IsSigned = BuiltinID == AArch64::BI__mulh;
+Value *LHS =
+Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, 
IsSigned);
+Value *RHS =
+Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, 
IsSigned);
+
+Value *MulResult, *HigherBits;
+if (IsSigned) {
+  MulResult = Builder.CreateNSWMul(LHS, RHS);
+  HigherBits = Builder.CreateAShr(MulResult, 64);
+} else {
+  MulResult = Builder.CreateNUWMul(LHS, RHS);
+  HigherBits = Builder.CreateLShr(MulResult, 64);
+}
+HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
+
+return HigherBits;
+  }
+
   // Handle MSVC intrinsics before argument evaluation to prevent double
   // evaluation.
   if (Optional MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
Index: clang/include/clang/Basic/BuiltinsAArch64.def
===
--- clang/include/clang/Basic/BuiltinsAArch64.def
+++ clang/include/clang/Basic/BuiltinsAArch64.def
@@ -243,6 +243,9 @@
 TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 
+TARGET_HEADER_BUILTIN(__mulh,  "SLLiSLLiSLLi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
+
 #undef BUILTIN
 #undef LANGBUILTIN
 #undef TARGET_HEADER_BUILTIN


Index: clang/test/CodeGen/arm64-microsoft-intrinsics.c
===
--- clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -81,6 +81,28 @@
 // CHECK-MSVC: fence syncscope("singlethread")
 // CHECK-LINUX: error: implicit declaration of function '_ReadWriteBarrier'
 
+long long check_mulh(long long a, long long b) {
+  return __mulh(a, b);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]] = sext i64 {{.*}} to i128
+// CHECK-MSVC: %[[ARG2:.*]] = sext i64 

[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-08-18 Thread Reid Kleckner via Phabricator via cfe-commits
rnk accepted this revision.
rnk added a comment.
This revision is now accepted and ready to land.

lgtm


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106721/new/

https://reviews.llvm.org/D106721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-08-17 Thread Martin Storsjö via Phabricator via cfe-commits
mstorsjo updated this revision to Diff 366839.
mstorsjo added a comment.

Reimplemented the builtins by expanding to the corresponding i128 
multiplication in IR, just like the corresponding existing `__mulh` and 
`__umulh` builtins on x86.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106721/new/

https://reviews.llvm.org/D106721

Files:
  clang/include/clang/Basic/BuiltinsAArch64.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/intrin.h
  clang/test/CodeGen/arm64-microsoft-intrinsics.c


Index: clang/test/CodeGen/arm64-microsoft-intrinsics.c
===
--- clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -81,6 +81,28 @@
 // CHECK-MSVC: fence syncscope("singlethread")
 // CHECK-LINUX: error: implicit declaration of function '_ReadWriteBarrier'
 
+long long check_mulh(long long a, long long b) {
+  return __mulh(a, b);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]] = sext i64 {{.*}} to i128
+// CHECK-MSVC: %[[ARG2:.*]] = sext i64 {{.*}} to i128
+// CHECK-MSVC: %[[PROD:.*]] = mul nsw i128 %[[ARG1]], %[[ARG2]]
+// CHECK-MSVC: %[[HIGH:.*]] = ashr i128 %[[PROD]], 64
+// CHECK-MSVC: %[[RES:.*]] = trunc i128 %[[HIGH]] to i64
+// CHECK-LINUX: error: implicit declaration of function '__mulh'
+
+unsigned long long check_umulh(unsigned long long a, unsigned long long b) {
+  return __umulh(a, b);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]] = zext i64 {{.*}} to i128
+// CHECK-MSVC: %[[ARG2:.*]] = zext i64 {{.*}} to i128
+// CHECK-MSVC: %[[PROD:.*]] = mul nuw i128 %[[ARG1]], %[[ARG2]]
+// CHECK-MSVC: %[[HIGH:.*]] = lshr i128 %[[PROD]], 64
+// CHECK-MSVC: %[[RES:.*]] = trunc i128 %[[HIGH]] to i64
+// CHECK-LINUX: error: implicit declaration of function '__umulh'
+
 unsigned __int64 check__getReg() {
   unsigned volatile __int64 reg;
   reg = __getReg(18);
Index: clang/lib/Headers/intrin.h
===
--- clang/lib/Headers/intrin.h
+++ clang/lib/Headers/intrin.h
@@ -574,6 +574,9 @@
 unsigned short __cdecl _byteswap_ushort(unsigned short val);
 unsigned long __cdecl _byteswap_ulong (unsigned long val);
 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
+
+__int64 __mulh(__int64 __a, __int64 __b);
+unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b);
 #endif
 
 
/**\
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,29 @@
 return Builder.CreateCall(F);
   }
 
+  if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) {
+llvm::Type *ResType = ConvertType(E->getType());
+llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
+
+bool IsSigned = BuiltinID == AArch64::BI__mulh;
+Value *LHS =
+Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, 
IsSigned);
+Value *RHS =
+Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, 
IsSigned);
+
+Value *MulResult, *HigherBits;
+if (IsSigned) {
+  MulResult = Builder.CreateNSWMul(LHS, RHS);
+  HigherBits = Builder.CreateAShr(MulResult, 64);
+} else {
+  MulResult = Builder.CreateNUWMul(LHS, RHS);
+  HigherBits = Builder.CreateLShr(MulResult, 64);
+}
+HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
+
+return HigherBits;
+  }
+
   // Handle MSVC intrinsics before argument evaluation to prevent double
   // evaluation.
   if (Optional MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
Index: clang/include/clang/Basic/BuiltinsAArch64.def
===
--- clang/include/clang/Basic/BuiltinsAArch64.def
+++ clang/include/clang/Basic/BuiltinsAArch64.def
@@ -243,6 +243,9 @@
 TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 
+TARGET_HEADER_BUILTIN(__mulh,  "SLLiSLLiSLLi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
+
 #undef BUILTIN
 #undef LANGBUILTIN
 #undef TARGET_HEADER_BUILTIN


Index: clang/test/CodeGen/arm64-microsoft-intrinsics.c
===
--- clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -81,6 +81,28 @@
 // CHECK-MSVC: fence syncscope("singlethread")
 // CHECK-LINUX: error: implicit declaration of function '_ReadWriteBarrier'
 
+long long check_mulh(long long a, long long b) {
+  return __mulh(a, b);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]] = sext i64 {{.*}} to i128
+// CHECK-MSVC: %[[ARG2:.*]] = sext i64 {{.*}} to 

[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-07-26 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added a comment.

In D106721#2902586 , @efriedma wrote:

> We won't call compiler-rt for i128 multiply on aarch64.  It's not worthwhile 
> under any circumstances.
>
> Pattern-matching 64/64->128 multiply in SelectionDAG legalization has been 
> reliable in practice, as far as I know.  And even it misses due to weird 
> behavior in instcombine or something like that, we only end up with one or 
> two extra mla instructions, so not a big deal.

Sounds reasonable to use i128 to me then.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106721/new/

https://reviews.llvm.org/D106721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-07-24 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added a comment.

We won't call compiler-rt for i128 multiply on aarch64.  It's not worthwhile 
under any circumstances.

Pattern-matching 64/64->128 multiply in SelectionDAG legalization has been 
reliable in practice, as far as I know.  And even it misses due to weird 
behavior in instcombine or something like that, we only end up with one or two 
extra mla instructions, so not a big deal.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106721/new/

https://reviews.llvm.org/D106721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-07-23 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added a comment.

In D106721#2901728 , @efriedma wrote:

> Do we need LLVM intrinsics for these?  For the x86 equivalents, we just 
> generate `mul i128`.

I worry that LLVM will end up generating a call to compiler-rt to implement 
i128 arithmetic, especially in unoptimized builds, and currently nothing 
autolinks `clang_rt.builtins-${arch}.lib` in an MSVC environment. We can do 
that, I filed an issue for it, it just needs to get done.

I also worry that without the intrinsic, LLVM will more often than not fail to 
match the pattern here. The issues users filed about x86 rotate instructions 
come to mind, LLVM failed to produce the desired `rot` instructions sometimes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106721/new/

https://reviews.llvm.org/D106721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-07-23 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added a comment.

Do we need LLVM intrinsics for these?  For the x86 equivalents, we just 
generate `mul i128`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106721/new/

https://reviews.llvm.org/D106721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106721: [AArch64] Implemnt MSVC __mulh and __umulh builtins and corresponding IR level intrinsics

2021-07-23 Thread Martin Storsjö via Phabricator via cfe-commits
mstorsjo created this revision.
mstorsjo added reviewers: rnk, STL_MSFT, efriedma, DavidSpickett.
Herald added subscribers: danielkiss, hiraditya, kristof.beyls.
mstorsjo requested review of this revision.
Herald added projects: clang, LLVM.

This seems to work, but would it need more testing for anything else
than just the most trivial happy path cases, and any other tests than
these? (I tried to look for some of the existing tests for MSVC ARM64
intrinsics, but they're all very sparingly tested.)

This should fix PR51128.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106721

Files:
  clang/include/clang/Basic/BuiltinsAArch64.def
  clang/lib/Headers/intrin.h
  clang/test/CodeGen/arm64-microsoft-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/intrinsics-mulh.ll

Index: llvm/test/CodeGen/AArch64/intrinsics-mulh.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/intrinsics-mulh.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=aarch64-eabi -O=3 | FileCheck %s
+
+
+define i64 @check_smulh(i64 %a, i64 %b) {
+entry:
+  ; CHECK: smulh x0, x0, x1
+  %0 = tail call i64 @llvm.aarch64.smulh(i64 %a, i64 %b)
+  ret i64 %0
+} 
+
+define i64 @check_umulh(i64 %a, i64 %b) {
+entry:
+  ; CHECK: umulh x0, x0, x1
+  %0 = tail call i64 @llvm.aarch64.umulh(i64 %a, i64 %b)
+  ret i64 %0
+} 
+
+
+declare i64 @llvm.aarch64.smulh(i64, i64)
+declare i64 @llvm.aarch64.umulh(i64, i64)
+
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1709,6 +1709,8 @@
 // Multiply-high
 def SMULHrr : MulHi<0b010, "smulh", mulhs>;
 def UMULHrr : MulHi<0b110, "umulh", mulhu>;
+def : Pat<(int_aarch64_smulh GPR64:$Rn, GPR64:$Rm), (SMULHrr GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(int_aarch64_umulh GPR64:$Rn, GPR64:$Rm), (UMULHrr GPR64:$Rn, GPR64:$Rm)>;
 
 // CRC32
 def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
Index: llvm/include/llvm/IR/IntrinsicsAArch64.td
===
--- llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -34,6 +34,13 @@
 
 def int_aarch64_clrex : Intrinsic<[]>;
 
+def int_aarch64_smulh : MSBuiltin<"__mulh">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+[IntrNoMem, IntrWillReturn]>;
+def int_aarch64_umulh : MSBuiltin<"__umulh">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+[IntrNoMem, IntrWillReturn]>;
+
 def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
 LLVMMatchType<0>], [IntrNoMem]>;
 def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
Index: clang/test/CodeGen/arm64-microsoft-intrinsics.c
===
--- clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -81,6 +81,20 @@
 // CHECK-MSVC: fence syncscope("singlethread")
 // CHECK-LINUX: error: implicit declaration of function '_ReadWriteBarrier'
 
+long long check_mulh(long long a, long long b) {
+  return __mulh(a, b);
+}
+
+// CHECK-MSVC: call i64 @llvm.aarch64.smulh(i64 %[[PARAM1:.*]], i64 %[[PARAM2:.*]])
+// CHECK-LINUX: error: implicit declaration of function '__mulh'
+
+unsigned long long check_umulh(unsigned long long a, unsigned long long b) {
+  return __umulh(a, b);
+}
+
+// CHECK-MSVC: call i64 @llvm.aarch64.umulh(i64 %[[PARAM3:.*]], i64 %[[PARAM4:.*]])
+// CHECK-LINUX: error: implicit declaration of function '__umulh'
+
 unsigned __int64 check__getReg() {
   unsigned volatile __int64 reg;
   reg = __getReg(18);
Index: clang/lib/Headers/intrin.h
===
--- clang/lib/Headers/intrin.h
+++ clang/lib/Headers/intrin.h
@@ -574,6 +574,9 @@
 unsigned short __cdecl _byteswap_ushort(unsigned short val);
 unsigned long __cdecl _byteswap_ulong (unsigned long val);
 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
+
+__int64 __mulh(__int64 __a, __int64 __b);
+unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b);
 #endif
 
 /**\
Index: clang/include/clang/Basic/BuiltinsAArch64.def
===
--- clang/include/clang/Basic/BuiltinsAArch64.def
+++ clang/include/clang/Basic/BuiltinsAArch64.def
@@ -243,6 +243,9 @@
 TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")