================
@@ -176,6 +172,157 @@ ArrayAttr updateResAttrs(MLIRContext *ctx, ArrayAttr 
existingResAttrs,
   return ArrayAttr::get(ctx, {DictionaryAttr::get(ctx, attrs)});
 }
 
+/// Coerce \p src to type \p dstTy at the current builder insertion point.
+///
+/// Three strategies, in order of preference:
+///   - If src and dst are the same type, return src unchanged and leave
+///     \p createdOps empty.
+///   - If both are non-aggregate same-bit-width values that just differ in
+///     vector-vs-scalar shape (e.g. !cir.vector<2 x !cir.float> ↔
+///     !cir.complex<!cir.float>), use cir.reinterpret_cast which is free at
+///     the IR level.
+///   - Otherwise go through memory: allocate a slot of the source type
+///     (using max(srcAlign, dstAlign) for the alloca alignment), store
+///     the source, bitcast the pointer to the destination type, load the
+///     destination type back.
+///
+/// The temporary alloca is placed at the start of the enclosing function's
+/// entry block so that it composes correctly with the HoistAllocas pass
+/// regardless of pipeline ordering.
+///
+/// Any operations the helper creates are appended to \p createdOps so the
+/// caller can pass them to replaceAllUsesExcept and avoid clobbering the
+/// store's value operand when later rewiring the source value.
+Value emitCoercion(OpBuilder &rewriter, Location loc, Type dstTy, Value src,
+                   FunctionOpInterface funcOp, const DataLayout &dl,
+                   SmallPtrSetImpl<Operation *> &createdOps) {
+  Type srcTy = src.getType();
+  if (srcTy == dstTy)
+    return src;
+
+  // Reinterpret path: same total bit width, neither side is a record, and
+  // the shapes differ only in vector-vs-non-vector.  Going through memory
+  // is wasteful for these — they have the same in-register representation.
+  bool isAggregate = isa<cir::RecordType>(srcTy) || 
isa<cir::RecordType>(dstTy);
+  bool vectorMismatch =
+      isa<cir::VectorType>(srcTy) != isa<cir::VectorType>(dstTy);
+  if (!isAggregate && vectorMismatch &&
+      dl.getTypeSizeInBits(srcTy) == dl.getTypeSizeInBits(dstTy)) {
+    auto reinterpret =
+        cir::ReinterpretCastOp::create(rewriter, loc, dstTy, src);
+    createdOps.insert(reinterpret);
+    return reinterpret;
+  }
+
+  // Memory path: alloca + store + ptr-cast + load.  The alloca goes in the
+  // entry block (Andy's review comment #3 on the original PR), with
+  // alignment = max(srcAlign, dstAlign) to satisfy both the store and the
+  // load (review comment #1).
+  uint64_t srcAlign = dl.getTypeABIAlignment(srcTy);
+  uint64_t dstAlign = dl.getTypeABIAlignment(dstTy);
+  uint64_t allocaAlign = std::max(srcAlign, dstAlign);
+
+  auto srcPtrTy = cir::PointerType::get(srcTy);
+  auto dstPtrTy = cir::PointerType::get(dstTy);
+
+  cir::AllocaOp alloca;
+  {
+    OpBuilder::InsertionGuard guard(rewriter);
+    Block &entry = funcOp->getRegion(0).front();
+    rewriter.setInsertionPointToStart(&entry);
+    alloca = cir::AllocaOp::create(rewriter, loc, srcPtrTy, srcTy,
----------------
andykaylor wrote:

I was thinking about this, and I think if it is possible, the most you'd get is 
an argument that is coerced to a smaller size, so the store and load is still 
safe. Trying to reproduce this, I found something interesting in the way 
classic codegen emits code for coerced structs:

https://godbolt.org/z/fbshaffeG

Notice that the coerced argument is stored, but only the element that is 
accessed is reloaded. We should keep an eye on how CIR handles that. I don't 
think it's necessary to reproduce that exact behavior for this PR though.

https://github.com/llvm/llvm-project/pull/195879
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to