================
@@ -176,6 +172,157 @@ ArrayAttr updateResAttrs(MLIRContext *ctx, ArrayAttr
existingResAttrs,
return ArrayAttr::get(ctx, {DictionaryAttr::get(ctx, attrs)});
}
+/// Coerce \p src to type \p dstTy at the current builder insertion point.
+///
+/// Three strategies, in order of preference:
+/// - If src and dst are the same type, return src unchanged and leave
+/// \p createdOps empty.
+/// - If both are non-aggregate same-bit-width values that just differ in
+/// vector-vs-scalar shape (e.g. !cir.vector<2 x !cir.float> ↔
+/// !cir.complex<!cir.float>), use cir.reinterpret_cast which is free at
+/// the IR level.
+/// - Otherwise go through memory: allocate a slot of the source type
+/// (using max(srcAlign, dstAlign) for the alloca alignment), store
+/// the source, bitcast the pointer to the destination type, load the
+/// destination type back.
+///
+/// The temporary alloca is placed at the start of the enclosing function's
+/// entry block so that it composes correctly with the HoistAllocas pass
+/// regardless of pipeline ordering.
+///
+/// Any operations the helper creates are appended to \p createdOps so the
+/// caller can pass them to replaceAllUsesExcept and avoid clobbering the
+/// store's value operand when later rewiring the source value.
+Value emitCoercion(OpBuilder &rewriter, Location loc, Type dstTy, Value src,
+ FunctionOpInterface funcOp, const DataLayout &dl,
+ SmallPtrSetImpl<Operation *> &createdOps) {
+ Type srcTy = src.getType();
+ if (srcTy == dstTy)
+ return src;
+
+ // Reinterpret path: same total bit width, neither side is a record, and
+ // the shapes differ only in vector-vs-non-vector. Going through memory
+ // is wasteful for these — they have the same in-register representation.
+ bool isAggregate = isa<cir::RecordType>(srcTy) ||
isa<cir::RecordType>(dstTy);
+ bool vectorMismatch =
+ isa<cir::VectorType>(srcTy) != isa<cir::VectorType>(dstTy);
+ if (!isAggregate && vectorMismatch &&
+ dl.getTypeSizeInBits(srcTy) == dl.getTypeSizeInBits(dstTy)) {
+ auto reinterpret =
+ cir::ReinterpretCastOp::create(rewriter, loc, dstTy, src);
+ createdOps.insert(reinterpret);
+ return reinterpret;
+ }
+
+ // Memory path: alloca + store + ptr-cast + load. The alloca goes in the
+ // entry block (Andy's review comment #3 on the original PR), with
+ // alignment = max(srcAlign, dstAlign) to satisfy both the store and the
+ // load (review comment #1).
+ uint64_t srcAlign = dl.getTypeABIAlignment(srcTy);
+ uint64_t dstAlign = dl.getTypeABIAlignment(dstTy);
+ uint64_t allocaAlign = std::max(srcAlign, dstAlign);
+
+ auto srcPtrTy = cir::PointerType::get(srcTy);
+ auto dstPtrTy = cir::PointerType::get(dstTy);
+
+ cir::AllocaOp alloca;
+ {
+ OpBuilder::InsertionGuard guard(rewriter);
+ Block &entry = funcOp->getRegion(0).front();
+ rewriter.setInsertionPointToStart(&entry);
+ alloca = cir::AllocaOp::create(rewriter, loc, srcPtrTy, srcTy,
----------------
andykaylor wrote:
I was thinking about this, and I think if it is possible, the most you'd get is
an argument that is coerced to a smaller size, so the store and load is still
safe. Trying to reproduce this, I found something interesting in the way
classic codegen emits code for coerced structs:
https://godbolt.org/z/fbshaffeG
Notice that the coerced argument is stored, but only the element that is
accessed is reloaded. We should keep an eye on how CIR handles that. I don't
think it's necessary to reproduce that exact behavior for this PR though.
https://github.com/llvm/llvm-project/pull/195879
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits