[PATCH] D79167: [SVE][CodeGen] Legalise scalable vector types for vsetcc & vselect

Kerry McLaughlin via Phabricator via cfe-commits Thu, 30 Apr 2020 06:13:31 -0700

kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, efriedma, dancgr.
Herald added subscribers: psnobl, rkruppe, hiraditya, tschuett.
Herald added a project: LLVM.
kmclaughlin added a parent revision: D79087: [SVE][Codegen] Lower legal min & 
max operations.


The visitSelect function in SelectionDAGBuilder calls
getTypeConversion to get the type of the operation
after it has been legalised.

This patch changes getTypeConversion to use
ElementCount where necessary to ensure that the
Scalable flag is not dropped.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D79167

Files:
  llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
  llvm/lib/CodeGen/TargetLoweringBase.cpp
  llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll

Index: llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
===================================================================
--- llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
+++ llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
@@ -150,6 +150,88 @@
   ret <vscale x 2 x i64> %min
 }
 
+define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c) {
+; CHECK-LABEL: @smin_split_i8
+; CHECK-DAG: ptrue p0.b
+; CHECK-DAG: smin z0.b, p0/m, z0.b, z2.b
+; CHECK-DAG: smin z1.b, p0/m, z1.b, z3.b
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 32 x i8> %a, %b
+  %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b
+  ret <vscale x 32 x i8> %min
+}
+
+define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c) {
+; CHECK-LABEL: smin_split_i16:
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: smin z0.h, p0/m, z0.h, z4.h
+; CHECK-DAG: smin z1.h, p0/m, z1.h, z5.h
+; CHECK-DAG: smin z2.h, p0/m, z2.h, z6.h
+; CHECK-DAG: smin z3.h, p0/m, z3.h, z7.h
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 32 x i16> %a, %b
+  %min = select <vscale x 32 x i1> %cmp, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b
+  ret <vscale x 32 x i16> %min
+}
+
+define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
+; CHECK-LABEL: smin_split_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smin z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: smin z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 8 x i32> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
+  ret <vscale x 8 x i32> %min
+}
+
+define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
+; CHECK-LABEL: smin_split_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: smin z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: smin z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 4 x i64> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
+  ret <vscale x 4 x i64> %min
+}
+
+define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
+; CHECK-LABEL: @smin_promote_i8
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: sxtb z1.h, p0/m, z1.h
+; CHECK-DAG: sxtb z0.h, p0/m, z0.h
+; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 8 x i8> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
+  ret <vscale x 8 x i8> %min
+}
+
+define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
+; CHECK-LABEL: @smin_promote_i16
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: sxth z1.s, p0/m, z1.s
+; CHECK-DAG: sxth z0.s, p0/m, z0.s
+; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 4 x i16> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
+  ret <vscale x 4 x i16> %min
+}
+
+define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
+; CHECK-LABEL: @smin_promote_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sxtw z1.d, p0/m, z1.d
+; CHECK-DAG: sxtw z0.d, p0/m, z0.d
+; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp slt <vscale x 2 x i32> %a, %b
+  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
+  ret <vscale x 2 x i32> %min
+}
+
 ;
 ; UMIN
 ;
@@ -194,6 +276,27 @@
   ret <vscale x 2 x i64> %min
 }
 
+define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
+; CHECK-LABEL: umin_split_i64:
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: umin z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: umin z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 4 x i64> %a, %b
+  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
+  ret <vscale x 4 x i64> %min
+}
+
+define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
+; CHECK-LABEL: @umin_promote_i8
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %cmp = icmp ult <vscale x 8 x i8> %a, %b
+  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
+  ret <vscale x 8 x i8> %min
+}
+
 ;
 ; SMAX
 ;
@@ -204,8 +307,8 @@
 ; CHECK-DAG: smax z0.b, p0/m, z0.b, z1.b
 ; CHECK-NEXT: ret
   %cmp = icmp sgt <vscale x 16 x i8> %a, %b
-  %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
-  ret <vscale x 16 x i8> %min
+  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+  ret <vscale x 16 x i8> %max
 }
 
 define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
@@ -214,8 +317,8 @@
 ; CHECK-DAG: smax z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT: ret
   %cmp = icmp sgt <vscale x 8 x i16> %a, %b
-  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
-  ret <vscale x 8 x i16> %min
+  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+  ret <vscale x 8 x i16> %max
 }
 
 define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
@@ -224,8 +327,8 @@
 ; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT: ret
   %cmp = icmp sgt <vscale x 4 x i32> %a, %b
-  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
-  ret <vscale x 4 x i32> %min
+  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+  ret <vscale x 4 x i32> %max
 }
 
 define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
@@ -234,8 +337,29 @@
 ; CHECK-DAG: smax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT: ret
   %cmp = icmp sgt <vscale x 2 x i64> %a, %b
-  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
-  ret <vscale x 2 x i64> %min
+  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+  ret <vscale x 2 x i64> %max
+}
+
+define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
+; CHECK-LABEL: smax_split_i32:
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smax z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: smax z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 8 x i32> %a, %b
+  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
+  ret <vscale x 8 x i32> %max
+}
+
+define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
+; CHECK-LABEL: @smax_promote_i16
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %cmp = icmp sgt <vscale x 4 x i16> %a, %b
+  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
+  ret <vscale x 4 x i16> %max
 }
 
 ;
@@ -248,8 +372,8 @@
 ; CHECK-DAG: umax z0.b, p0/m, z0.b, z1.b
 ; CHECK-NEXT: ret
   %cmp = icmp ugt <vscale x 16 x i8> %a, %b
-  %min = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
-  ret <vscale x 16 x i8> %min
+  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+  ret <vscale x 16 x i8> %max
 }
 
 define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
@@ -258,8 +382,8 @@
 ; CHECK-DAG: umax z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT: ret
   %cmp = icmp ugt <vscale x 8 x i16> %a, %b
-  %min = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
-  ret <vscale x 8 x i16> %min
+  %max = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+  ret <vscale x 8 x i16> %max
 }
 
 define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
@@ -268,8 +392,8 @@
 ; CHECK-DAG: umax z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT: ret
   %cmp = icmp ugt <vscale x 4 x i32> %a, %b
-  %min = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
-  ret <vscale x 4 x i32> %min
+  %max = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+  ret <vscale x 4 x i32> %max
 }
 
 define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
@@ -278,6 +402,27 @@
 ; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT: ret
   %cmp = icmp ugt <vscale x 2 x i64> %a, %b
-  %min = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
-  ret <vscale x 2 x i64> %min
+  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+  ret <vscale x 2 x i64> %max
+}
+
+define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c) {
+; CHECK-LABEL: umax_split_i16:
+; CHECK-DAG: ptrue p0.h
+; CHECK-DAG: umax z0.h, p0/m, z0.h, z2.h
+; CHECK-DAG: umax z1.h, p0/m, z1.h, z3.h
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 16 x i16> %a, %b
+  %max = select <vscale x 16 x i1> %cmp, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b
+  ret <vscale x 16 x i16> %max
+}
+
+define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
+; CHECK-LABEL: @umax_promote_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %cmp = icmp ugt <vscale x 2 x i32> %a, %b
+  %max = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
+  ret <vscale x 2 x i32> %max
 }
Index: llvm/lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -820,9 +820,10 @@
            "Promote may not follow Expand or Promote");
 
     if (LA == TypeSplitVector)
-      return LegalizeKind(LA,
-                          EVT::getVectorVT(Context, SVT.getVectorElementType(),
-                                           SVT.getVectorNumElements() / 2));
+      return LegalizeKind(LA, EVT::getVectorVT(Context,
+                                               SVT.getVectorElementType(),
+                                               SVT.getVectorElementCount()/2));
+
     if (LA == TypeScalarizeVector)
       return LegalizeKind(LA, SVT.getVectorElementType());
     return LegalizeKind(LA, NVT);
@@ -849,11 +850,11 @@
   }
 
   // Handle vector types.
-  unsigned NumElts = VT.getVectorNumElements();
   EVT EltVT = VT.getVectorElementType();
+  auto EltCnt = VT.getVectorElementCount();
 
   // Vectors with only one element are always scalarized.
-  if (NumElts == 1)
+  if ((EltCnt.Min == 1) && !EltCnt.Scalable)
     return LegalizeKind(TypeScalarizeVector, EltVT);
 
   // Try to widen vector elements until the element type is a power of two and
@@ -863,8 +864,8 @@
     // Vectors with a number of elements that is not a power of two are always
     // widened, for example <3 x i8> -> <4 x i8>.
     if (!VT.isPow2VectorType()) {
-      NumElts = (unsigned)NextPowerOf2(NumElts);
-      EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
+      EltCnt.Min = (unsigned)NextPowerOf2(EltCnt.Min);
+      EVT NVT = EVT::getVectorVT(Context, EltVT, EltCnt.Min);
       return LegalizeKind(TypeWidenVector, NVT);
     }
 
@@ -875,7 +876,7 @@
     //  <4 x i140> -> <2 x i140>
     if (LK.first == TypeExpandInteger)
       return LegalizeKind(TypeSplitVector,
-                          EVT::getVectorVT(Context, EltVT, NumElts / 2));
+                          EVT::getVectorVT(Context, EltVT, EltCnt / 2));
 
     // Promote the integer element types until a legal vector type is found
     // or until the element integer type is too big. If a legal type was not
@@ -896,11 +897,11 @@
         break;
 
       // Build a new vector type and check if it is legal.
-      MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+      MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), EltCnt);
       // Found a legal promoted vector type.
       if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
         return LegalizeKind(TypePromoteInteger,
-                            EVT::getVectorVT(Context, EltVT, NumElts));
+                            EVT::getVectorVT(Context, EltVT, EltCnt));
     }
 
     // Reset the type to the unexpanded type if we did not find a legal vector
@@ -912,14 +913,14 @@
   // If there is no wider legal type, split the vector.
   while (true) {
     // Round up to the next power of 2.
-    NumElts = (unsigned)NextPowerOf2(NumElts);
+    EltCnt.Min = (unsigned)NextPowerOf2(EltCnt.Min);
 
     // If there is no simple vector type with this many elements then there
     // cannot be a larger legal vector type.  Note that this assumes that
     // there are no skipped intermediate vector types in the simple types.
     if (!EltVT.isSimple())
       break;
-    MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+    MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), EltCnt);
     if (LargerVector == MVT())
       break;
 
@@ -935,7 +936,7 @@
   }
 
   // Vectors with illegal element types are expanded.
-  EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
+  EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount() / 2);
   return LegalizeKind(TypeSplitVector, NVT);
 }
 
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2614,9 +2614,11 @@
   SDLoc DL(N);
   GetSplitVector(N->getOperand(0), Lo0, Hi0);
   GetSplitVector(N->getOperand(1), Lo1, Hi1);
-  unsigned PartElements = Lo0.getValueType().getVectorNumElements();
-  EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
-  EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+  auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
+
+  LLVMContext &Context = *DAG.getContext();
+  EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
+  EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
 
   LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
   HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D79167: [SVE][CodeGen] Legalise scalable vector types for vsetcc & vselect

Reply via email to