craig.topper created this revision.
craig.topper added reviewers: bsmith, sdesmalen, c-rhodes, joechrisellis.
Herald added subscribers: jobnoorman, luke, VincentWu, vkmr, frasercrmck, 
luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, 
PkmX, the_o, brucehoult, MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, 
shiva0217, kito-cheng, niosHD, sabuasal, simoncook, johnrusso, rbar, asb, 
kristof.beyls, arichardson.
Herald added a project: All.
craig.topper requested review of this revision.
Herald added subscribers: wangpc, alextsao1999, eopXD.
Herald added a project: clang.

This improves the codegen for calling a function that returns a
VLST type. Previously we stored to an alloca using the scalable
type, but loaded it using the fixed vector type. The middle end is
unable to optimize away that store/load pair. With this patch we now
store using the fixed vector type which matches the load.

I have not added predicate types because I haven't supported
those on RISC-V yet so I haven't seen a problem yet.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155222

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c


Index: clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
===================================================================
--- clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
+++ clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
@@ -38,11 +38,7 @@
 
 // CHECK-LABEL: @sizeless_caller(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[COERCE1:%.*]] = alloca <8 x i32>, align 8
-// CHECK-NEXT:    store <vscale x 2 x i32> [[X:%.*]], ptr [[COERCE1]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[COERCE1]], align 8, 
!tbaa [[TBAA4:![0-9]+]]
-// CHECK-NEXT:    [[CASTSCALABLESVE2:%.*]] = tail call <vscale x 2 x i32> 
@llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP0]], 
i64 0)
-// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE2]]
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[X:%.*]]
 //
 vint32m1_t sizeless_caller(vint32m1_t x) {
   return fixed_callee(x);
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
===================================================================
--- clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -41,11 +41,7 @@
 
 // CHECK-LABEL: @sizeless_caller(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[COERCE1:%.*]] = alloca <16 x i32>, align 16
-// CHECK-NEXT:    store <vscale x 4 x i32> [[X:%.*]], ptr [[COERCE1]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[COERCE1]], align 16, 
!tbaa [[TBAA6:![0-9]+]]
-// CHECK-NEXT:    [[CASTSCALABLESVE2:%.*]] = tail call <vscale x 4 x i32> 
@llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> 
[[TMP1]], i64 0)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE2]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[X:%.*]]
 //
 svint32_t sizeless_caller(svint32_t x) {
   return fixed_callee(x);
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1390,6 +1390,19 @@
     return;
   }
 
+  // If coercing a fixed vector from a scalable vector for ABI compatibility,
+  // and the types match, use the llvm.vector.extract intrinsic to perform the
+  // conversion.
+  if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(DstTy)) {
+    if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
+      if (FixedDst->getElementType() == ScalableSrc->getElementType()) {
+        auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+        Src = CGF.Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed");
+        CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
+        return;
+      }
+    }
+  }
   llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
 
   // If store is legal, just bitcast the src pointer.


Index: clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
===================================================================
--- clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
+++ clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
@@ -38,11 +38,7 @@
 
 // CHECK-LABEL: @sizeless_caller(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[COERCE1:%.*]] = alloca <8 x i32>, align 8
-// CHECK-NEXT:    store <vscale x 2 x i32> [[X:%.*]], ptr [[COERCE1]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[COERCE1]], align 8, !tbaa [[TBAA4:![0-9]+]]
-// CHECK-NEXT:    [[CASTSCALABLESVE2:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP0]], i64 0)
-// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE2]]
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[X:%.*]]
 //
 vint32m1_t sizeless_caller(vint32m1_t x) {
   return fixed_callee(x);
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
===================================================================
--- clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -41,11 +41,7 @@
 
 // CHECK-LABEL: @sizeless_caller(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[COERCE1:%.*]] = alloca <16 x i32>, align 16
-// CHECK-NEXT:    store <vscale x 4 x i32> [[X:%.*]], ptr [[COERCE1]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[COERCE1]], align 16, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-NEXT:    [[CASTSCALABLESVE2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP1]], i64 0)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE2]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[X:%.*]]
 //
 svint32_t sizeless_caller(svint32_t x) {
   return fixed_callee(x);
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1390,6 +1390,19 @@
     return;
   }
 
+  // If coercing a fixed vector from a scalable vector for ABI compatibility,
+  // and the types match, use the llvm.vector.extract intrinsic to perform the
+  // conversion.
+  if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(DstTy)) {
+    if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
+      if (FixedDst->getElementType() == ScalableSrc->getElementType()) {
+        auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+        Src = CGF.Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed");
+        CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
+        return;
+      }
+    }
+  }
   llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
 
   // If store is legal, just bitcast the src pointer.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to