Clang

Amy Kwan via Phabricator via cfe-commits Thu, 25 Jun 2020 16:25:24 -0700

amyk created this revision.
amyk added reviewers: power-llvm-team, PowerPC, nemanjai, lei, saghir.
amyk added projects: LLVM, clang, PowerPC.
Herald added subscribers: shchenz, hiraditya.


This patch implements the following function prototypes to utilize the 
`vmulh[s|u][w|d]` and `vdive[s|u][w|d]` instructions:

  vector signed int vec_mulh (vector signed int a, vector signed int b);  
  vector unsigned int vec_mulh (vector unsigned int a, vector unsigned int b); 
  vector signed long long vec_mulh (vector signed long long a, vector signed 
long long b); 
  vector unsigned long long vec_mulh (vector unsigned long long a, vector 
unsigned long long b);
  
  vector signed int vec_dive (vector signed int a, vector signed int b);
  vector unsigned int vec_dive (vector unsigned int a, vector unsigned int b);
  vector signed long long vec_dive (vector signed long long a, vector signed 
long long b);
  vector unsigned long long vec_dive (vector unsigned long long a, vector 
unsigned long long b);

Depends on D82584 <https://reviews.llvm.org/D82584>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82609

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
  llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===================================================================
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -84,3 +84,15 @@
 # CHECK-BE: vmulhud 1, 2, 3                       # encoding: [0x10,0x22,0x1a,0xc9]
 # CHECK-LE: vmulhud 1, 2, 3                       # encoding: [0xc9,0x1a,0x22,0x10]
             vmulhud 1, 2, 3
+# CHECK-BE: vdivesw 21, 11, 10                    # encoding: [0x12,0xab,0x53,0x8b]
+# CHECK-LE: vdivesw 21, 11, 10                    # encoding: [0x8b,0x53,0xab,0x12]
+            vdivesw 21, 11, 10
+# CHECK-BE: vdiveuw 21, 11, 10                    # encoding: [0x12,0xab,0x52,0x8b]
+# CHECK-LE: vdiveuw 21, 11, 10                    # encoding: [0x8b,0x52,0xab,0x12]
+            vdiveuw 21, 11, 10
+# CHECK-BE: vdivesd 21, 11, 10                    # encoding: [0x12,0xab,0x53,0xcb]
+# CHECK-LE: vdivesd 21, 11, 10                    # encoding: [0xcb,0x53,0xab,0x12]
+            vdivesd 21, 11, 10
+# CHECK-BE: vdiveud 21, 11, 10                    # encoding: [0x12,0xab,0x52,0xcb]
+# CHECK-LE: vdiveud 21, 11, 10                    # encoding: [0xcb,0x52,0xab,0x12]
+            vdiveud 21, 11, 10
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===================================================================
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -81,3 +81,15 @@
 
 # CHECK: vmulhud 1, 2, 3
 0x10 0x22 0x1a 0xc9
+
+# CHECK: vdivesw 21, 11, 10
+0x12 0xab 0x53 0x8b
+
+# CHECK: vdiveuw 21, 11, 10
+0x12 0xab 0x52 0x8b
+
+# CHECK: vdivesd 21, 11, 10
+0x12 0xab 0x53 0xcb
+
+# CHECK: vdiveud 21, 11, 10
+0x12 0xab 0x52 0xcb
Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -72,3 +72,49 @@
   %tr = trunc <4 x i64> %shr to <4 x i32>
   ret <4 x i32> %tr
 }
+
+; Test the vector multiply high intrinsics.
+declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>)
+
+define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhsw_intrinsic:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhsw v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %mulh
+}
+
+define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhuw_intrinsic:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhuw v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %mulh
+}
+
+define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhsd_intrinsic:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhsd v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %mulh
+}
+
+define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhud_intrinsic:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhud v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %mulh
+}
Index: llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
@@ -46,3 +46,48 @@
   %div = sdiv <4 x i32> %a, %b
   ret <4 x i32> %div
 }
+
+declare <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64>, <2 x i64>)
+
+define <4 x i32> @test_vdivesw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vdivesw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vdivesw v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %div = tail call <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %div
+}
+
+define <4 x i32> @test_vdiveuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vdiveuw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vdiveuw v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %div = tail call <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %div
+}
+
+define <2 x i64> @test_vdivesd(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vdivesd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vdivesd v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %div = tail call <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %div
+}
+
+define <2 x i64> @test_vdiveud(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vdiveud:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vdiveud v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %div = tail call <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %div
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -607,6 +607,22 @@
    def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                          "vdivud $vD, $vA, $vB", IIC_VecGeneral,
                          [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>;
+   def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                          "vdivesw $vD, $vA, $vB", IIC_VecGeneral,
+                          [(set v4i32:$vD,
+                          (int_ppc_altivec_vdivesw v4i32:$vA, v4i32:$vB))]>;
+   def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                          "vdiveuw $vD, $vA, $vB", IIC_VecGeneral,
+                          [(set v4i32:$vD,
+                          (int_ppc_altivec_vdiveuw v4i32:$vA, v4i32:$vB))]>;
+   def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                          "vdivesd $vD, $vA, $vB", IIC_VecGeneral,
+                          [(set v2i64:$vD,
+                          (int_ppc_altivec_vdivesd v2i64:$vA, v2i64:$vB))]>;
+   def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                          "vdiveud $vD, $vA, $vB", IIC_VecGeneral,
+                          [(set v2i64:$vD,
+                          (int_ppc_altivec_vdiveud v2i64:$vA, v2i64:$vB))]>;
 }
 
 //---------------------------- Anonymous Patterns ----------------------------//
@@ -619,4 +635,14 @@
             (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>;
   def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
             (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
+
+  // Exploit the vector multiply high instructions using intrinsics.
+  def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)),
+            (v4i32 (VMULHSW $vA, $vB))>;
+  def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)),
+            (v4i32 (VMULHUW $vA, $vB))>;
+  def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)),
+            (v2i64 (VMULHSD $vA, $vB))>;
+  def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)),
+            (v2i64 (VMULHUD $vA, $vB))>;
 }
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -800,6 +800,18 @@
                             [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
                             [IntrNoMem]>;
 
+// Vector Divide Extended Intrinsics.
+def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
+def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
+def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;
+def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;
+
+// Vector Multiply High Intrinsics.
+def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
+def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
+def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
+def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+
 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.
 
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -12,6 +12,54 @@
 vector unsigned long long vulla, vullb;
 unsigned int uia;
 
+vector signed int test_vec_dive_si(void) {
+  // CHECK: @llvm.ppc.altivec.vdivesw(<4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_dive(vsia, vsib);
+}
+
+vector unsigned int test_vec_dive_ui(void) {
+  // CHECK: @llvm.ppc.altivec.vdiveuw(<4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_dive(vuia, vuib);
+}
+
+vector signed long long test_vec_dive_sll(void) {
+  // CHECK: @llvm.ppc.altivec.vdivesd(<2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_dive(vslla, vsllb);
+}
+
+vector unsigned long long test_vec_dive_ull(void) {
+  // CHECK: @llvm.ppc.altivec.vdiveud(<2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_dive(vulla, vullb);
+}
+
+vector signed int test_vec_mulh_si(void) {
+  // CHECK: @llvm.ppc.altivec.vmulhsw(<4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_mulh(vsia, vsib);
+}
+
+vector unsigned int test_vec_mulh_ui(void) {
+  // CHECK: @llvm.ppc.altivec.vmulhuw(<4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_mulh(vuia, vuib);
+}
+
+vector signed long long test_vec_mulh_sll(void) {
+  // CHECK: @llvm.ppc.altivec.vmulhsd(<2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_mulh(vslla, vsllb);
+}
+
+vector unsigned long long test_vec_mulh_ull(void) {
+  // CHECK: @llvm.ppc.altivec.vmulhud(<2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_mulh(vulla, vullb);
+}
+
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
   // CHECK-NEXT: ret <2 x i64>
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -3288,6 +3288,30 @@
 }
 #endif
 
+/* vec_dive */
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed int __ATTRS_o_ai
+vec_dive(vector signed int __a, vector signed int __b) {
+  return __builtin_altivec_vdivesw(__a, __b);
+}
+
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_dive(vector unsigned int __a, vector unsigned int __b) {
+  return __builtin_altivec_vdiveuw(__a, __b);
+}
+
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_dive(vector signed long long __a, vector signed long long __b) {
+  return __builtin_altivec_vdivesd(__a, __b);
+}
+
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_dive(vector unsigned long long __a, vector unsigned long long __b) {
+  return __builtin_altivec_vdiveud(__a, __b);
+}
+#endif
+
 /* vec_dss */
 
 #define vec_dss __builtin_altivec_dss
@@ -5737,6 +5761,30 @@
 #endif
 }
 
+/* vec_mulh */
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed int __ATTRS_o_ai
+vec_mulh(vector signed int __a, vector signed int __b) {
+  return __builtin_altivec_vmulhsw(__a, __b);
+}
+
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_mulh(vector unsigned int __a, vector unsigned int __b) {
+  return __builtin_altivec_vmulhuw(__a, __b);
+}
+
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_mulh(vector signed long long __a, vector signed long long __b) {
+  return __builtin_altivec_vmulhsd(__a, __b);
+}
+
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_mulh(vector unsigned long long __a, vector unsigned long long __b) {
+  return __builtin_altivec_vmulhud(__a, __b);
+}
+#endif
+
 /* vec_mulo */
 
 static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a,
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -298,6 +298,18 @@
 BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "")
 BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "")
 
+// P10 Vector Divide Extended built-ins.
+BUILTIN(__builtin_altivec_vdivesw, "V4SiV4SiV4Si", "")
+BUILTIN(__builtin_altivec_vdiveuw, "V4UiV4UiV4Ui", "")
+BUILTIN(__builtin_altivec_vdivesd, "V2LLiV2LLiV2LLi", "")
+BUILTIN(__builtin_altivec_vdiveud, "V2ULLiV2ULLiV2ULLi", "")
+
+// P10 Vector Multiply High built-ins.
+BUILTIN(__builtin_altivec_vmulhsw, "V4SiV4SiV4Si", "")
+BUILTIN(__builtin_altivec_vmulhuw, "V4UiV4UiV4Ui", "")
+BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "")
+BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "")
+
 // P10 Vector Parallel Bits built-ins.
 BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "")
 BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82609: [PowerPC][Power10] Implement Vector Multiply High/Divide Extended Builtins in LLVM/Clang

Reply via email to