https://github.com/AmrDeveloper updated https://github.com/llvm/llvm-project/pull/153796
>From 6613d4c0d735461a0b5ef04da544f6ddf3f38541 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Wed, 13 Aug 2025 20:22:38 +0200 Subject: [PATCH 1/6] [CIR] Upstream DivOp for ComplexType --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 40 ++- clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp | 60 +++- clang/lib/CIR/CodeGen/CIRGenFunction.h | 2 + .../Dialect/Transforms/LoweringPrepare.cpp | 176 ++++++++- clang/test/CIR/CodeGen/complex-mul-div.cpp | 336 +++++++++++++++++- 5 files changed, 602 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index a77e9199cdc96..4ccc5b1f24a5d 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2966,7 +2966,7 @@ def CIR_ComplexSubOp : CIR_Op<"complex.sub", [ } //===----------------------------------------------------------------------===// -// ComplexMulOp +// ComplexMulOp & ComplexDivOp //===----------------------------------------------------------------------===// def CIR_ComplexRangeKind : CIR_I32EnumAttr< @@ -3013,6 +3013,44 @@ def CIR_ComplexMulOp : CIR_Op<"complex.mul", [ }]; } +def CIR_ComplexDivOp : CIR_Op<"complex.div", [ + Pure, SameOperandsAndResultType +]> { + let summary = "Complex division"; + let description = [{ + The `cir.complex.div` operation takes two complex numbers and returns + their division. + + Range is used to select the implementation used when the operation + is lowered to the LLVM dialect. For division, 'improved' and + 'promoted' are all handled equivalently, producing the + Smith's algorithms for Complex division. If 'full' is used, + a runtime-library function is called if one of the intermediate + calculations produced a NaN value, and for 'basic' algebraic formula with + no special handling for NaN value will be used. + + Example: + + ```mlir + %2 = cir.complex.div %0, %1 range(basic) : !cir.complex<!cir.float> + %2 = cir.complex.div %0, %1 range(full) : !cir.complex<!cir.float> + ``` + }]; + + let arguments = (ins + CIR_ComplexType:$lhs, + CIR_ComplexType:$rhs, + CIR_ComplexRangeKind:$range, + UnitAttr:$promoted + ); + + let results = (outs CIR_ComplexType:$result); + + let assemblyFormat = [{ + $lhs `,` $rhs `range` `(` $range `)` `:` qualified(type($result)) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Bit Manipulation Operations //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index 85cd0282ffc2a..b1afab398d7f4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -10,6 +10,7 @@ namespace { class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> { CIRGenFunction &cgf; CIRGenBuilderTy &builder; + bool fpHasBeenPromoted = false; public: explicit ComplexExprEmitter(CIRGenFunction &cgf) @@ -128,6 +129,35 @@ class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> { mlir::Value emitBinAdd(const BinOpInfo &op); mlir::Value emitBinSub(const BinOpInfo &op); mlir::Value emitBinMul(const BinOpInfo &op); + mlir::Value emitBinDiv(const BinOpInfo &op); + + QualType higherPrecisionTypeForComplexArithmetic(QualType elementType, + bool isDivOpCode) { + ASTContext &astContext = cgf.getContext(); + const QualType higherElementType = + astContext.GetHigherPrecisionFPType(elementType); + const llvm::fltSemantics &elementTypeSemantics = + astContext.getFloatTypeSemantics(elementType); + const llvm::fltSemantics &higherElementTypeSemantics = + astContext.getFloatTypeSemantics(higherElementType); + + // Check that the promoted type can handle the intermediate values without + // overflowing. This can be interpreted as: + // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <= + // LargerType.LargestFiniteVal. + // In terms of exponent it gives this formula: + // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal + // doubles the exponent of SmallerType.LargestFiniteVal) + if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <= + llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) { + fpHasBeenPromoted = true; + return astContext.getComplexType(higherElementType); + } + + // The intermediate values can't be represented in the promoted type + // without overflowing. + return QualType(); + } QualType getPromotionType(QualType ty, bool isDivOpCode = false) { if (auto *complexTy = ty->getAs<ComplexType>()) { @@ -135,8 +165,7 @@ class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> { if (isDivOpCode && elementTy->isFloatingType() && cgf.getLangOpts().getComplexRange() == LangOptions::ComplexRangeKind::CX_Promoted) { - cgf.cgm.errorNYI("HigherPrecisionTypeForComplexArithmetic"); - return QualType(); + return higherPrecisionTypeForComplexArithmetic(elementTy, isDivOpCode); } if (elementTy.UseExcessPrecision(cgf.getContext())) @@ -154,13 +183,14 @@ class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> { e->getType(), e->getOpcode() == BinaryOperatorKind::BO_Div); \ mlir::Value result = emitBin##OP(emitBinOps(e, promotionTy)); \ if (!promotionTy.isNull()) \ - cgf.cgm.errorNYI("Binop emitUnPromotedValue"); \ + result = cgf.emitUnPromotedValue(result, e->getType()); \ return result; \ } HANDLEBINOP(Add) HANDLEBINOP(Sub) HANDLEBINOP(Mul) + HANDLEBINOP(Div) #undef HANDLEBINOP // Compound assignments. @@ -858,6 +888,22 @@ mlir::Value ComplexExprEmitter::emitBinMul(const BinOpInfo &op) { return builder.createComplexCreate(op.loc, newReal, newImag); } +mlir::Value ComplexExprEmitter::emitBinDiv(const BinOpInfo &op) { + assert(!cir::MissingFeatures::fastMathFlags()); + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + + if (mlir::isa<cir::ComplexType>(op.lhs.getType()) && + mlir::isa<cir::ComplexType>(op.rhs.getType())) { + cir::ComplexRangeKind rangeKind = + getComplexRangeAttr(op.fpFeatures.getComplexRange()); + return builder.create<cir::ComplexDivOp>(op.loc, op.lhs, op.rhs, rangeKind, + fpHasBeenPromoted); + } + + cgf.cgm.errorNYI("ComplexExprEmitter::emitBinMu between Complex & Scalar"); + return {}; +} + LValue CIRGenFunction::emitComplexAssignmentLValue(const BinaryOperator *e) { assert(e->getOpcode() == BO_Assign && "Expected assign op"); @@ -954,6 +1000,14 @@ mlir::Value CIRGenFunction::emitPromotedValue(mlir::Value result, convertType(promotionType)); } +mlir::Value CIRGenFunction::emitUnPromotedValue(mlir::Value result, + QualType unPromotionType) { + assert(!mlir::cast<cir::ComplexType>(result.getType()).isIntegerComplex() && + "integral complex will never be promoted"); + return builder.createCast(cir::CastKind::float_complex, result, + convertType(unPromotionType)); +} + LValue CIRGenFunction::emitScalarCompoundAssignWithComplex( const CompoundAssignOperator *e, mlir::Value &result) { CompoundFunc op = getComplexOp(e->getOpcode()); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index ddc1edd77010c..6f49a2a25b6b4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1302,6 +1302,8 @@ class CIRGenFunction : public CIRGenTypeCache { LValue emitUnaryOpLValue(const clang::UnaryOperator *e); + mlir::Value emitUnPromotedValue(mlir::Value result, QualType unPromotionType); + /// Emit a reached-unreachable diagnostic if \p loc is valid and runtime /// checking is enabled. Otherwise, just emit an unreachable instruction. /// \p createNewBlock indicates whether to create a new block for the IR diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 66260eb36e002..676b6bfbdb456 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -8,7 +8,6 @@ #include "PassDetail.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/CharUnits.h" #include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" @@ -27,6 +26,7 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { void runOnOp(mlir::Operation *op); void lowerCastOp(cir::CastOp op); + void lowerComplexDivOp(cir::ComplexDivOp op); void lowerComplexMulOp(cir::ComplexMulOp op); void lowerUnaryOp(cir::UnaryOp op); void lowerArrayDtor(cir::ArrayDtor op); @@ -181,6 +181,176 @@ static mlir::Value buildComplexBinOpLibCall( return call.getResult(); } +static llvm::StringRef +getComplexDivLibCallName(llvm::APFloat::Semantics semantics) { + switch (semantics) { + case llvm::APFloat::S_IEEEhalf: + return "__divhc3"; + case llvm::APFloat::S_IEEEsingle: + return "__divsc3"; + case llvm::APFloat::S_IEEEdouble: + return "__divdc3"; + case llvm::APFloat::S_PPCDoubleDouble: + return "__divtc3"; + case llvm::APFloat::S_x87DoubleExtended: + return "__divxc3"; + case llvm::APFloat::S_IEEEquad: + return "__divtc3"; + default: + llvm_unreachable("unsupported floating point type"); + } +} + +static mlir::Value +buildAlgebraicComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, + mlir::Value lhsReal, mlir::Value lhsImag, + mlir::Value rhsReal, mlir::Value rhsImag) { + // (a+bi) / (c+di) = ((ac+bd)/(cc+dd)) + ((bc-ad)/(cc+dd))i + mlir::Value &a = lhsReal; + mlir::Value &b = lhsImag; + mlir::Value &c = rhsReal; + mlir::Value &d = rhsImag; + + mlir::Value ac = builder.createBinop(loc, a, cir::BinOpKind::Mul, c); // a*c + mlir::Value bd = builder.createBinop(loc, b, cir::BinOpKind::Mul, d); // b*d + mlir::Value cc = builder.createBinop(loc, c, cir::BinOpKind::Mul, c); // c*c + mlir::Value dd = builder.createBinop(loc, d, cir::BinOpKind::Mul, d); // d*d + mlir::Value acbd = + builder.createBinop(loc, ac, cir::BinOpKind::Add, bd); // ac+bd + mlir::Value ccdd = + builder.createBinop(loc, cc, cir::BinOpKind::Add, dd); // cc+dd + mlir::Value resultReal = + builder.createBinop(loc, acbd, cir::BinOpKind::Div, ccdd); + + mlir::Value bc = builder.createBinop(loc, b, cir::BinOpKind::Mul, c); // b*c + mlir::Value ad = builder.createBinop(loc, a, cir::BinOpKind::Mul, d); // a*d + mlir::Value bcad = + builder.createBinop(loc, bc, cir::BinOpKind::Sub, ad); // bc-ad + mlir::Value resultImag = + builder.createBinop(loc, bcad, cir::BinOpKind::Div, ccdd); + return builder.createComplexCreate(loc, resultReal, resultImag); +} + +static mlir::Value +buildRangeReductionComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, + mlir::Value lhsReal, mlir::Value lhsImag, + mlir::Value rhsReal, mlir::Value rhsImag) { + // Implements Smith's algorithm for complex division. + // SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + + // Let: + // - lhs := a+bi + // - rhs := c+di + // - result := lhs / rhs = e+fi + // + // The algorithm pseudocode looks like follows: + // if fabs(c) >= fabs(d): + // r := d / c + // tmp := c + r*d + // e = (a + b*r) / tmp + // f = (b - a*r) / tmp + // else: + // r := c / d + // tmp := d + r*c + // e = (a*r + b) / tmp + // f = (b*r - a) / tmp + + mlir::Value &a = lhsReal; + mlir::Value &b = lhsImag; + mlir::Value &c = rhsReal; + mlir::Value &d = rhsImag; + + auto trueBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) { + mlir::Value r = builder.createBinop(loc, d, cir::BinOpKind::Div, + c); // r := d / c + mlir::Value rd = builder.createBinop(loc, r, cir::BinOpKind::Mul, d); // r*d + mlir::Value tmp = builder.createBinop(loc, c, cir::BinOpKind::Add, + rd); // tmp := c + r*d + + mlir::Value br = builder.createBinop(loc, b, cir::BinOpKind::Mul, r); // b*r + mlir::Value abr = + builder.createBinop(loc, a, cir::BinOpKind::Add, br); // a + b*r + mlir::Value e = builder.createBinop(loc, abr, cir::BinOpKind::Div, tmp); + + mlir::Value ar = builder.createBinop(loc, a, cir::BinOpKind::Mul, r); // a*r + mlir::Value bar = + builder.createBinop(loc, b, cir::BinOpKind::Sub, ar); // b - a*r + mlir::Value f = builder.createBinop(loc, bar, cir::BinOpKind::Div, tmp); + + mlir::Value result = builder.createComplexCreate(loc, e, f); + builder.createYield(loc, result); + }; + + auto falseBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) { + mlir::Value r = builder.createBinop(loc, c, cir::BinOpKind::Div, + d); // r := c / d + mlir::Value rc = builder.createBinop(loc, r, cir::BinOpKind::Mul, c); // r*c + mlir::Value tmp = builder.createBinop(loc, d, cir::BinOpKind::Add, + rc); // tmp := d + r*c + + mlir::Value ar = builder.createBinop(loc, a, cir::BinOpKind::Mul, r); // a*r + mlir::Value arb = + builder.createBinop(loc, ar, cir::BinOpKind::Add, b); // a*r + b + mlir::Value e = builder.createBinop(loc, arb, cir::BinOpKind::Div, tmp); + + mlir::Value br = builder.createBinop(loc, b, cir::BinOpKind::Mul, r); // b*r + mlir::Value bra = + builder.createBinop(loc, br, cir::BinOpKind::Sub, a); // b*r - a + mlir::Value f = builder.createBinop(loc, bra, cir::BinOpKind::Div, tmp); + + mlir::Value result = builder.createComplexCreate(loc, e, f); + builder.createYield(loc, result); + }; + + auto cFabs = builder.create<cir::FAbsOp>(loc, c); + auto dFabs = builder.create<cir::FAbsOp>(loc, d); + cir::CmpOp cmpResult = + builder.createCompare(loc, cir::CmpOpKind::ge, cFabs, dFabs); + auto ternary = builder.create<cir::TernaryOp>( + loc, cmpResult, trueBranchBuilder, falseBranchBuilder); + + return ternary.getResult(); +} + +static mlir::Value lowerComplexDiv(LoweringPreparePass &pass, + CIRBaseBuilderTy &builder, + mlir::Location loc, cir::ComplexDivOp op, + mlir::Value lhsReal, mlir::Value lhsImag, + mlir::Value rhsReal, mlir::Value rhsImag) { + cir::ComplexType complexTy = op.getType(); + if (mlir::isa<cir::FPTypeInterface>(complexTy.getElementType())) { + cir::ComplexRangeKind range = op.getRange(); + if (range == cir::ComplexRangeKind::Improved || + (range == cir::ComplexRangeKind::Promoted && !op.getPromoted())) + return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag, + rhsReal, rhsImag); + if (range == cir::ComplexRangeKind::Full) + return buildComplexBinOpLibCall(pass, builder, &getComplexDivLibCallName, + loc, complexTy, lhsReal, lhsImag, rhsReal, + rhsImag); + } + + return buildAlgebraicComplexDiv(builder, loc, lhsReal, lhsImag, rhsReal, + rhsImag); +} + +void LoweringPreparePass::lowerComplexDivOp(cir::ComplexDivOp op) { + cir::CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointAfter(op); + mlir::Location loc = op.getLoc(); + mlir::TypedValue<cir::ComplexType> lhs = op.getLhs(); + mlir::TypedValue<cir::ComplexType> rhs = op.getRhs(); + mlir::Value lhsReal = builder.createComplexReal(loc, lhs); + mlir::Value lhsImag = builder.createComplexImag(loc, lhs); + mlir::Value rhsReal = builder.createComplexReal(loc, rhs); + mlir::Value rhsImag = builder.createComplexImag(loc, rhs); + + mlir::Value loweredResult = lowerComplexDiv(*this, builder, loc, op, lhsReal, + lhsImag, rhsReal, rhsImag); + op.replaceAllUsesWith(loweredResult); + op.erase(); +} + static llvm::StringRef getComplexMulLibCallName(llvm::APFloat::Semantics semantics) { switch (semantics) { @@ -412,6 +582,8 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { lowerArrayDtor(arrayDtor); else if (auto cast = mlir::dyn_cast<cir::CastOp>(op)) lowerCastOp(cast); + else if (auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op)) + lowerComplexDivOp(complexDiv); else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) lowerComplexMulOp(complexMul); else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) @@ -427,7 +599,7 @@ void LoweringPreparePass::runOnOperation() { op->walk([&](mlir::Operation *op) { if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp, - cir::ComplexMulOp, cir::UnaryOp>(op)) + cir::ComplexMulOp, cir::ComplexDivOp, cir::UnaryOp>(op)) opsToTransform.push_back(op); }); diff --git a/clang/test/CIR/CodeGen/complex-mul-div.cpp b/clang/test/CIR/CodeGen/complex-mul-div.cpp index 633080577092c..aa44b72e6aaa3 100644 --- a/clang/test/CIR/CodeGen/complex-mul-div.cpp +++ b/clang/test/CIR/CodeGen/complex-mul-div.cpp @@ -3,27 +3,27 @@ // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-cir %s -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll -// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED,LLVM-BASIC // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -emit-llvm %s -o %t.ll -// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED,OGCG-COMBINED +// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED,OGCG-COMBINED,OGCG-BASIC // complex-range improved // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=improved -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-IMPROVED %s // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-cir %s -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll -// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED,LLVM-IMPROVED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -emit-llvm %s -o %t.ll -// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED,OGCG-COMBINED +// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED,OGCG-COMBINED,OGCG-IMPROVED // complex-range promoted // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=promoted -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-PROMOTED %s // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-cir %s -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll -// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED,LLVM-PROMOTED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -emit-llvm %s -o %t.ll -// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED,OGCG-COMBINED +// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED,OGCG-COMBINED,OGCG-PROMOTED // complex-range full // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=full -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-FULL %s @@ -324,3 +324,327 @@ void foo2() { // OGCG-COMBINED: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1 // OGCG-COMBINED: store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 // OGCG-COMBINED: store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 + +void foo3() { + float _Complex a; + float _Complex b; + float _Complex c = a / b; +} + +// CIR-BEFORE-BASIC: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(basic) : !cir.complex<!cir.float> + +// LLVM-BASIC: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-BASIC: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-BASIC: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-BASIC: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4 +// LLVM-BASIC: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4 +// LLVM-BASIC: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 +// LLVM-BASIC: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 +// LLVM-BASIC: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0 +// LLVM-BASIC: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1 +// LLVM-BASIC: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]] +// LLVM-BASIC: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]] +// LLVM-BASIC: %[[MUL_BR_BR:.*]] = fmul float %[[B_REAL]], %[[B_REAL]] +// LLVM-BASIC: %[[MUL_BI_BI:.*]] = fmul float %[[B_IMAG]], %[[B_IMAG]] +// LLVM-BASIC: %[[ADD_ARBR_AIBI:.*]] = fadd float %[[MUL_AR_BR]], %[[MUL_AI_BI]] +// LLVM-BASIC: %[[ADD_BRBR_BIBI:.*]] = fadd float %[[MUL_BR_BR]], %[[MUL_BI_BI]] +// LLVM-BASIC: %[[RESULT_REAL:.*]] = fdiv float %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] +// LLVM-BASIC: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]] +// LLVM-BASIC: %[[MUL_BR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]] +// LLVM-BASIC: %[[SUB_AIBR_BRBI:.*]] = fsub float %[[MUL_AI_BR]], %[[MUL_BR_BI]] +// LLVM-BASIC: %[[RESULT_IMAG:.*]] = fdiv float %[[SUB_AIBR_BRBI]], %[[ADD_BRBR_BIBI]] +// LLVM-BASIC: %[[TMP_RESULT:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL]], 0 +// LLVM-BASIC: %[[RESULT:.*]] = insertvalue { float, float } %[[TMP_RESULT]], float %[[RESULT_IMAG]], 1 +// LLVM-BASIC: store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4 + +// OGCG-BASIC: %[[A_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-BASIC: %[[B_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-BASIC: %[[C_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-BASIC: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG-BASIC: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4 +// OGCG-BASIC: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG-BASIC: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4 +// OGCG-BASIC: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG-BASIC: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4 +// OGCG-BASIC: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG-BASIC: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4 +// OGCG-BASIC: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]] +// OGCG-BASIC: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]] +// OGCG-BASIC: %[[ADD_ARBR_AIBI:.*]] = fadd float %[[MUL_AR_BR]], %[[MUL_AI_BI]] +// OGCG-BASIC: %[[MUL_BR_BR:.*]] = fmul float %[[B_REAL]], %[[B_REAL]] +// OGCG-BASIC: %[[MUL_BI_BI:.*]] = fmul float %[[B_IMAG]], %[[B_IMAG]] +// OGCG-BASIC: %[[ADD_BRBR_BIBI:.*]] = fadd float %[[MUL_BR_BR]], %[[MUL_BI_BI]] +// OGCG-BASIC: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]] +// OGCG-BASIC: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]] +// OGCG-BASIC: %[[SUB_AIBR_BRBI:.*]] = fsub float %[[MUL_AI_BR]], %[[MUL_AR_BI]] +// OGCG-BASIC: %[[RESULT_REAL:.*]] = fdiv float %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] +// OGCG-BASIC: %[[RESULT_IMAG:.*]] = fdiv float %[[SUB_AIBR_BRBI]], %[[ADD_BRBR_BIBI]] +// OGCG-BASIC: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG-BASIC: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG-BASIC: store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG-BASIC: store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 + +// CIR-BEFORE-IMPROVED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(improved) : !cir.complex<!cir.float> + +// LLVM-IMPROVED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-IMPROVED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-IMPROVED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-IMPROVED: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4 +// LLVM-IMPROVED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4 +// LLVM-IMPROVED: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 +// LLVM-IMPROVED: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 +// LLVM-IMPROVED: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0 +// LLVM-IMPROVED: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1 +// LLVM-IMPROVED: %[[ABS_B_REAL:.*]] = call float @llvm.fabs.f32(float %[[B_REAL]]) +// LLVM-IMPROVED: %[[ABS_B_IMAG:.*]] = call float @llvm.fabs.f32(float %[[B_IMAG]]) +// LLVM-IMPROVED: %[[ABS_B_CMP:.*]] = fcmp oge float %[[ABS_B_REAL]], %[[ABS_B_IMAG]] +// LLVM-IMPROVED: br i1 %[[ABS_B_CMP]], label %[[ABS_BR_GT_ABS_BI:.*]], label %[[ABS_BR_LT_ABS_BI:.*]] +// LLVM-IMPROVED: [[ABS_BR_GT_ABS_BI]]: +// LLVM-IMPROVED: %[[DIV_BI_BR:.*]] = fdiv float %[[B_IMAG]], %[[B_REAL]] +// LLVM-IMPROVED: %[[MUL_DIV_BIBR_BI:.*]] = fmul float %[[DIV_BI_BR]], %[[B_IMAG]] +// LLVM-IMPROVED: %[[ADD_BR_MUL_DIV_BIBR_BI:.*]] = fadd float %[[B_REAL]], %[[MUL_DIV_BIBR_BI]] +// LLVM-IMPROVED: %[[MUL_AI_DIV_BIBR:.*]] = fmul float %[[A_IMAG]], %[[DIV_BI_BR]] +// LLVM-IMPROVED: %[[ADD_AR_MUL_AI_DIV_BIBR:.*]] = fadd float %[[A_REAL]], %[[MUL_AI_DIV_BIBR]] +// LLVM-IMPROVED: %[[RESULT_REAL:.*]] = fdiv float %[[ADD_AR_MUL_AI_DIV_BIBR]], %16 +// LLVM-IMPROVED: %[[MUL_AR_DIV_BI_BR:.*]] = fmul float %[[A_REAL]], %[[DIV_BI_BR]] +// LLVM-IMPROVED: %[[SUB_AI_MUL_AR_DIV_BIBR:.*]] = fsub float %[[A_IMAG]], %[[MUL_AR_DIV_BI_BR]] +// LLVM-IMPROVED: %[[RESULT_IMAG:.*]] = fdiv float %[[SUB_AI_MUL_AR_DIV_BIBR]], %[[ADD_BR_MUL_DIV_BIBR_BI]] +// LLVM-IMPROVED: %[[TMP_THEN_RESULT:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL]], 0 +// LLVM-IMPROVED: %[[THEN_RESULT:.*]] = insertvalue { float, float } %[[TMP_THEN_RESULT]], float %[[RESULT_IMAG]], 1 +// LLVM-IMPROVED: br label %[[PHI_RESULT:.*]] +// LLVM-IMPROVED: [[ABS_BR_LT_ABS_BI]]: +// LLVM-IMPROVED: %[[DIV_BR_BI:.*]] = fdiv float %[[B_REAL]], %[[B_IMAG]] +// LLVM-IMPROVED: %[[MUL_DIV_BRBI_BR:.*]] = fmul float %[[DIV_BR_BI]], %[[B_REAL]] +// LLVM-IMPROVED: %[[ADD_BI_MUL_DIV_BRBI_BR:.*]] = fadd float %[[B_IMAG]], %[[MUL_DIV_BRBI_BR]] +// LLVM-IMPROVED: %[[MUL_AR_DIV_BRBI:.*]] = fmul float %[[A_REAL]], %[[DIV_BR_BI]] +// LLVM-IMPROVED: %[[ADD_MUL_AR_DIV_BRBI_AI:.*]] = fadd float %[[MUL_AR_DIV_BRBI]], %[[A_IMAG]] +// LLVM-IMPROVED: %[[RESULT_REAL:.*]] = fdiv float %[[ADD_MUL_AR_DIV_BRBI_AI]], %[[ADD_BI_MUL_DIV_BRBI_BR]] +// LLVM-IMPROVED: %[[MUL_AI_DIV_BRBI:.*]] = fmul float %[[A_IMAG]], %[[DIV_BR_BI]] +// LLVM-IMPROVED: %[[SUB_MUL_AI_DIV_BRBI_AR:.*]] = fsub float %[[MUL_AI_DIV_BRBI]], %[[A_REAL]] +// LLVM-IMPROVED: %[[RESULT_IMAG:.*]] = fdiv float %[[SUB_MUL_AI_DIV_BRBI_AR]], %[[ADD_BI_MUL_DIV_BRBI_BR]] +// LLVM-IMPROVED: %[[TMP_ELSE_RESULT:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL]], 0 +// LLVM-IMPROVED: %[[ELSE_RESULT:.*]] = insertvalue { float, float } %[[TMP_ELSE_RESULT]], float %[[RESULT_IMAG]], 1 +// LLVM-IMPROVED: br label %[[PHI_RESULT]] +// LLVM-IMPROVED: [[PHI_RESULT]]: +// LLVM-IMPROVED: %[[RESULT:.*]] = phi { float, float } [ %[[ELSE_RESULT]], %[[ABS_BR_LT_ABS_BI]] ], [ %[[THEN_RESULT]], %[[ABS_BR_GT_ABS_BI]] ] +// LLVM-IMPROVED: br label %[[STORE_RESULT:.*]] +// LLVM-IMPROVED: [[STORE_RESULT]]: +// LLVM-IMPROVED: store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4 + +// OGCG-IMPROVED: %[[A_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-IMPROVED: %[[B_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-IMPROVED: %[[C_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-IMPROVED: %a.realp = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG-IMPROVED: %a.real = load float, ptr %a.realp, align 4 +// OGCG-IMPROVED: %a.imagp = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG-IMPROVED: %a.imag = load float, ptr %a.imagp, align 4 +// OGCG-IMPROVED: %b.realp = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG-IMPROVED: %b.real = load float, ptr %b.realp, align 4 +// OGCG-IMPROVED: %b.imagp = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG-IMPROVED: %b.imag = load float, ptr %b.imagp, align 4 +// OGCG-IMPROVED: %[[ABS_B_REAL:.*]] = call float @llvm.fabs.f32(float %[[B_REAL]]) +// OGCG-IMPROVED: %[[ABS_B_IMAG:.*]] = call float @llvm.fabs.f32(float %[[B_IMAG]]) +// OGCG-IMPROVED: %[[ABS_B_CMP:.*]] = fcmp ugt float %[[ABS_B_REAL]], %[[ABS_B_IMAG]] +// OGCG-IMPROVED: br i1 %[[ABS_B_CMP]], label %[[ABS_BR_GT_ABS_BI:.*]], label %[[ABS_BR_LT_ABS_BI:.*]] +// OGCG-IMPROVED: [[ABS_BR_GT_ABS_BI]]: +// OGCG-IMPROVED: %[[DIV_BI_BR:.*]] = fdiv float %[[B_IMAG]], %[[B_REAL]] +// OGCG-IMPROVED: %[[MUL_DIV_BIBR_BI:.*]] = fmul float %[[DIV_BI_BR]], %[[B_IMAG]] +// OGCG-IMPROVED: %[[ADD_BR_MUL_DIV_BIBR_BI:.*]] = fadd float %[[B_REAL]], %[[MUL_DIV_BIBR_BI]] +// OGCG-IMPROVED: %[[MUL_AI_DIV_BIBR:.*]] = fmul float %[[A_IMAG]], %[[DIV_BI_BR]] +// OGCG-IMPROVED: %[[ADD_AR_MUL_AI_DIV_BIBR:.*]] = fadd float %[[A_REAL]], %[[MUL_AI_DIV_BIBR]] +// OGCG-IMPROVED: %[[THEN_RESULT_REAL:.*]] = fdiv float %[[ADD_AR_MUL_AI_DIV_BIBR]], %[[ADD_BR_MUL_DIV_BIBR_BI]] +// OGCG-IMPROVED: %[[MUL_AR_DIV_BI_BR:.*]] = fmul float %[[A_REAL]], %[[DIV_BI_BR]] +// OGCG-IMPROVED: %[[SUB_AI_MUL_AR_DIV_BIBR:.*]] = fsub float %[[A_IMAG]], %[[MUL_AR_DIV_BI_BR]] +// OGCG-IMPROVED: %[[THEN_RESULT_IMAG:.*]] = fdiv float %[[SUB_AI_MUL_AR_DIV_BIBR]], %[[ADD_BR_MUL_DIV_BIBR_BI]] +// OGCG-IMPROVED: br label %[[STORE_RESULT:.*]] +// OGCG-IMPROVED: [[ABS_BR_LT_ABS_BI]]: +// OGCG-IMPROVED: %[[DIV_BR_BI:.*]] = fdiv float %[[B_REAL]], %[[B_IMAG]] +// OGCG-IMPROVED: %[[MUL_DIV_BRBI_BR:.*]] = fmul float %[[DIV_BR_BI]], %[[B_REAL]] +// OGCG-IMPROVED: %[[ADD_BI_MUL_DIV_BRBI_BR:.*]] = fadd float %[[B_IMAG]], %[[MUL_DIV_BRBI_BR]] +// OGCG-IMPROVED: %[[MUL_AR_DIV_BRBI:.*]] = fmul float %[[A_REAL]], %[[DIV_BR_BI]] +// OGCG-IMPROVED: %[[ADD_MUL_AR_DIV_BRBI_AI:.*]] = fadd float %[[MUL_AR_DIV_BRBI]], %[[A_IMAG]] +// OGCG-IMPROVED: %[[ELSE_RESULT_REAL:.*]] = fdiv float %[[ADD_MUL_AR_DIV_BRBI_AI]], %[[ADD_BI_MUL_DIV_BRBI_BR]] +// OGCG-IMPROVED: %[[MUL_AI_DIV_BRBI:.*]] = fmul float %[[A_IMAG]], %[[DIV_BR_BI]] +// OGCG-IMPROVED: %[[SUB_MUL_AI_DIV_BRBI_AR:.*]] = fsub float %[[MUL_AI_DIV_BRBI]], %[[A_REAL]] +// OGCG-IMPROVED: %[[ELSE_RESULT_IMAG:.*]] = fdiv float %[[SUB_MUL_AI_DIV_BRBI_AR]], %[[ADD_BI_MUL_DIV_BRBI_BR]] +// OGCG-IMPROVED: br label %[[STORE_RESULT]] +// OGCG-IMPROVED: [[STORE_RESULT]]: +// OGCG-IMPROVED: %[[RESULT_REAL:.*]] = phi float [ %[[THEN_RESULT_REAL]], %[[ABS_BR_GT_ABS_BI]] ], [ %[[ELSE_RESULT_REAL]], %[[ABS_BR_LT_ABS_BI]] ] +// OGCG-IMPROVED: %[[RESULT_IMAG:.*]] = phi float [ %[[THEN_RESULT_IMAG]], %[[ABS_BR_GT_ABS_BI]] ], [ %[[ELSE_RESULT_IMAG]], %[[ABS_BR_LT_ABS_BI]] ] +// OGCG-IMPROVED: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG-IMPROVED: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG-IMPROVED: store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG-IMPROVED: store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 + +// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.double> + +// LLVM-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-PROMOTED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-PROMOTED: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4 +// LLVM-PROMOTED: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 +// LLVM-PROMOTED: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 +// LLVM-PROMOTED: %[[A_REAL_F64:.*]] = fpext float %[[A_REAL]] to double +// LLVM-PROMOTED: %[[A_IMAG_F64:.*]] = fpext float %[[A_IMAG]] to double +// LLVM-PROMOTED: %[[TMP_A_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[A_REAL_F64]], 0 +// LLVM-PROMOTED: %[[A_CF64:.*]] = insertvalue { double, double } %[[TMP_A_CF64]], double %[[A_IMAG_F64]], 1 +// LLVM-PROMOTED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4 +// LLVM-PROMOTED: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0 +// LLVM-PROMOTED: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1 +// LLVM-PROMOTED: %[[B_REAL_F64:.*]] = fpext float %[[B_REAL]] to double +// LLVM-PROMOTED: %[[B_IMAG_F64:.*]] = fpext float %[[B_IMAG]] to double +// LLVM-PROMOTED: %[[TMP_B_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[B_REAL_F64]], 0 +// LLVM-PROMOTED: %[[B_CF64:.*]] = insertvalue { double, double } %[[TMP_B_CF64]], double %[[B_IMAG_F64]], 1 +// LLVM-PROMOTED: %[[MUL_AR_BR:.*]] = fmul double %[[A_REAL_F64]], %[[B_REAL_F64]] +// LLVM-PROMOTED: %[[MUL_AI_BI:.*]] = fmul double %[[A_IMAG_F64]], %[[B_IMAG_F64]] +// LLVM-PROMOTED: %[[MUL_BR_BR:.*]] = fmul double %[[B_REAL_F64]], %[[B_REAL_F64]] +// LLVM-PROMOTED: %[[MUL_BI_BI:.*]] = fmul double %[[B_IMAG_F64]], %[[B_IMAG_F64]] +// LLVM-PROMOTED: %[[ADD_ARBR_AIBI:.*]] = fadd double %[[MUL_AR_BR]], %[[MUL_AI_BI]] +// LLVM-PROMOTED: %[[ADD_BRBR_BIBI:.*]] = fadd double %[[MUL_BR_BR]], %[[MUL_BI_BI]] +// LLVM-PROMOTED: %[[RESULT_REAL:.*]] = fdiv double %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] +// LLVM-PROMOTED: %[[MUL_AI_BR:.*]] = fmul double %[[A_IMAG_F64]], %[[B_REAL_F64]] +// LLVM-PROMOTED: %[[MUL_AR_BI:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]] +// LLVM-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]] +// LLVM-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_ARBI]], %23 +// LLVM-PROMOTED: %[[TMP_RESULT_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[RESULT_REAL]], 0 +// LLVM-PROMOTED: %[[RESULT_CF64:.*]] = insertvalue { double, double } %[[TMP_RESULT_CF64]], double %[[RESULT_IMAG]], 1 +// LLVM-PROMOTED: %[[RESULT_REAL_F32:.*]] = fptrunc double %[[RESULT_REAL]] to float +// LLVM-PROMOTED: %[[RESULT_IMAG_F32:.*]] = fptrunc double %[[RESULT_IMAG]] to float +// LLVM-PROMOTED: %[[TMP_RESULT_CF32:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL_F32]], 0 +// LLVM-PROMOTED: %[[RESULT_CF32:.*]] = insertvalue { float, float } %[[TMP_RESULT_CF32]], float %[[RESULT_IMAG_F32]], 1 +// LLVM-PROMOTED: store { float, float } %[[RESULT_CF32]], ptr %[[C_ADDR]], align 4 + +// OGCG-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-PROMOTED: %[[C_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-PROMOTED: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG-PROMOTED: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4 +// OGCG-PROMOTED: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG-PROMOTED: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4 +// OGCG-PROMOTED: %[[A_REAL_F64:.*]] = fpext float %[[A_REAL]] to double +// OGCG-PROMOTED: %[[A_IMAG_F64:.*]] = fpext float %[[A_IMAG]] to double +// OGCG-PROMOTED: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG-PROMOTED: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4 +// OGCG-PROMOTED: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG-PROMOTED: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4 +// OGCG-PROMOTED: %[[B_REAL_F64:.*]] = fpext float %[[B_REAL]] to double +// OGCG-PROMOTED: %[[B_IMAG_F64:.*]] = fpext float %[[B_IMAG]] to double +// OGCG-PROMOTED: %[[MUL_AR_BR:.*]] = fmul double %[[A_REAL_F64]], %[[B_REAL_F64]] +// OGCG-PROMOTED: %[[MUL_AI_BI:.*]] = fmul double %[[A_IMAG_F64]], %[[B_IMAG_F64]] +// OGCG-PROMOTED: %[[ADD_ARBR_AIBI:.*]] = fadd double %[[MUL_AR_BR]], %[[MUL_AI_BI]] +// OGCG-PROMOTED: %[[MUL_BR_BR:.*]] = fmul double %[[B_REAL_F64]], %[[B_REAL_F64]] +// OGCG-PROMOTED: %[[MUL_BI_BI:.*]] = fmul double %[[B_IMAG_F64]], %[[B_IMAG_F64]] +// OGCG-PROMOTED: %[[ADD_BRBR_BIBI:.*]] = fadd double %[[MUL_BR_BR]], %[[MUL_BI_BI]] +// OGCG-PROMOTED: %[[MUL_AI_BR:.*]] = fmul double %[[A_IMAG_F64]], %[[B_REAL_F64]] +// OGCG-PROMOTED: %[[MUL_AR_BI:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]] +// OGCG-PROMOTED: %[[SUB_AIBR_BRBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]] +// OGCG-PROMOTED: %[[RESULT_REAL:.*]] = fdiv double %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] +// OGCG-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_BRBI]], %[[ADD_BRBR_BIBI]] +// OGCG-PROMOTED: %[[UNPROMOTION_RESULT_REAL:.*]] = fptrunc double %[[RESULT_REAL]] to float +// OGCG-PROMOTED: %[[UNPROMOTION_RESULT_IMAG:.*]] = fptrunc double %[[RESULT_IMAG]] to float +// OGCG-PROMOTED: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG-PROMOTED: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG-PROMOTED: store float %[[UNPROMOTION_RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG-PROMOTED: store float %[[UNPROMOTION_RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 + +// CIR-BEFORE-FULL: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(full) : !cir.complex<!cir.float> + +// LLVM-FULL: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-FULL: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-FULL: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM-FULL: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4 +// LLVM-FULL: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4 +// LLVM-FULL: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 +// LLVM-FULL: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 +// LLVM-FULL: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0 +// LLVM-FULL: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1 +// LLVM-FULL: %[[RESULT:.*]] = call { float, float } @__divsc3(float %[[A_REAL]], float %[[A_IMAG]], float %[[B_REAL]], float %[[B_IMAG]]) +// LLVM-FULL: store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4 + +// OGCG-FULL: %[[A_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-FULL: %[[B_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-FULL: %[[C_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-FULL: %[[RESULT_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-FULL: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG-FULL: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4 +// OGCG-FULL: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG-FULL: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4 +// OGCG-FULL: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG-FULL: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4 +// OGCG-FULL: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG-FULL: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4 +// OGCG-FULL: %[[RESULT:.*]] = call noundef <2 x float> @__divsc3(float noundef %[[A_REAL]], float noundef %[[A_IMAG]], float noundef %[[B_REAL]], float noundef %[[B_IMAG]]) #2 +// OGCG-FULL: store <2 x float> %[[RESULT]], ptr %[[RESULT_ADDR]], align 4 +// OGCG-FULL: %[[RESULT_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT_ADDR]], i32 0, i32 0 +// OGCG-FULL: %[[RESULT_REAL:.*]] = load float, ptr %[[RESULT_REAL_PTR]], align 4 +// OGCG-FULL: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT_ADDR]], i32 0, i32 1 +// OGCG-FULL: %[[RESULT_IMAG:.*]] = load float, ptr %[[RESULT_IMAG_PTR]], align 4 +// OGCG-FULL: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG-FULL: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG-FULL: store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG-FULL: store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 + +void foo4() { + int _Complex a; + int _Complex b; + int _Complex c = a / b; +} + +// CIR-BEFORE-BASIC: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(basic) : !cir.complex<!s32i> + +// CIR-BEFORE-IMPROVED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(improved) : !cir.complex<!s32i> + +// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(promoted) : !cir.complex<!s32i> + +// CIR-BEFORE-FULL: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(full) : !cir.complex<!s32i> + +// LLVM-COMBINED: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM-COMBINED: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM-COMBINED: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM-COMBINED: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4 +// LLVM-COMBINED: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4 +// LLVM-COMBINED: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0 +// LLVM-COMBINED: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1 +// LLVM-COMBINED: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0 +// LLVM-COMBINED: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1 +// LLVM-COMBINED: %[[MUL_AR_BR:.*]] = mul i32 %[[A_REAL]], %[[B_REAL]] +// LLVM-COMBINED: %[[MUL_AI_BI:.*]] = mul i32 %[[A_IMAG]], %[[B_IMAG]] +// LLVM-COMBINED: %[[MUL_BR_BR:.*]] = mul i32 %[[B_REAL]], %[[B_REAL]] +// LLVM-COMBINED: %[[MUL_BI_BI:.*]] = mul i32 %[[B_IMAG]], %[[B_IMAG]] +// LLVM-COMBINED: %[[ADD_ARBR_AIBI:.*]] = add i32 %[[MUL_AR_BR]], %[[MUL_AI_BI]] +// LLVM-COMBINED: %[[ADD_BRBR_BIBI:.*]] = add i32 %[[MUL_BR_BR]], %[[MUL_BI_BI]] +// LLVM-COMBINED: %[[RESULT_REAL:.*]] = sdiv i32 %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] +// LLVM-COMBINED: %[[MUL_AI_BR:.*]] = mul i32 %[[A_IMAG]], %[[B_REAL]] +// LLVM-COMBINED: %[[MUL_BR_BI:.*]] = mul i32 %[[A_REAL]], %[[B_IMAG]] +// LLVM-COMBINED: %[[SUB_AIBR_BRBI:.*]] = sub i32 %[[MUL_AI_BR]], %[[MUL_BR_BI]] +// LLVM-COMBINED: %[[RESULT_IMAG:.*]] = sdiv i32 %[[SUB_AIBR_BRBI]], %[[ADD_BRBR_BIBI]] +// LLVM-COMBINED: %[[TMP_RESULT:.*]] = insertvalue { i32, i32 } {{.*}}, i32 %[[RESULT_REAL]], 0 +// LLVM-COMBINED: %[[RESULT:.*]] = insertvalue { i32, i32 } %[[TMP_RESULT]], i32 %[[RESULT_IMAG]], 1 +// LLVM-COMBINED: store { i32, i32 } %[[RESULT]], ptr %[[C_ADDR]], align 4 + +// OGCG-COMBINED: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG-COMBINED: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG-COMBINED: %[[C_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG-COMBINED: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG-COMBINED: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4 +// OGCG-COMBINED: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG-COMBINED: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4 +// OGCG-COMBINED: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG-COMBINED: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4 +// OGCG-COMBINED: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG-COMBINED: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4 +// OGCG-COMBINED: %[[MUL_AR_BR:.*]] = mul i32 %[[A_REAL]], %[[B_REAL]] +// OGCG-COMBINED: %[[MUL_AI_BI:.*]] = mul i32 %[[A_IMAG]], %[[B_IMAG]] +// OGCG-COMBINED: %[[ADD_ARBR_AIBI:.*]] = add i32 %[[MUL_AR_BR]], %[[MUL_AI_BI]] +// OGCG-COMBINED: %[[MUL_BR_BR:.*]] = mul i32 %[[B_REAL]], %[[B_REAL]] +// OGCG-COMBINED: %[[MUL_BI_BI:.*]] = mul i32 %[[B_IMAG]], %[[B_IMAG]] +// OGCG-COMBINED: %[[ADD_BRBR_BIBI:.*]] = add i32 %[[MUL_BR_BR]], %[[MUL_BI_BI]] +// OGCG-COMBINED: %[[MUL_AI_BR:.*]] = mul i32 %[[A_IMAG]], %[[B_REAL]] +// OGCG-COMBINED: %[[MUL_AR_BI:.*]] = mul i32 %[[A_REAL]], %[[B_IMAG]] +// OGCG-COMBINED: %[[SUB_AIBR_BRBI:.*]] = sub i32 %[[MUL_AI_BR]], %[[MUL_AR_BI]] +// OGCG-COMBINED: %[[RESULT_REAL:.*]] = sdiv i32 %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] +// OGCG-COMBINED: %[[RESULT_IMAG:.*]] = sdiv i32 %[[SUB_AIBR_BRBI]], %[[ADD_BRBR_BIBI]] +// OGCG-COMBINED: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG-COMBINED: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG-COMBINED: store i32 %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG-COMBINED: store i32 %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 >From 6879b7063c08515beeb589410939e51127a3a94f Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Sat, 16 Aug 2025 15:34:11 +0200 Subject: [PATCH 2/6] Address code review comments --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 29 +++-- clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp | 38 +----- .../Dialect/Transforms/LoweringPrepare.cpp | 123 ++++++++++++++++-- clang/test/CIR/CodeGen/complex-mul-div.cpp | 32 ++--- 4 files changed, 145 insertions(+), 77 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 4ccc5b1f24a5d..54775e35b6f88 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2985,8 +2985,8 @@ def CIR_ComplexMulOp : CIR_Op<"complex.mul", [ The `cir.complex.mul` operation takes two complex numbers and returns their product. - Range is used to select the implementation used when the operation - is lowered to the LLVM dialect. For multiplication, 'improved', + The `range` attribute is used to select the algorithm used when the + operation is lowered to the LLVM dialect. For multiplication, 'improved', 'promoted', and 'basic' are all handled equivalently, producing the algebraic formula with no special handling for NaN value. If 'full' is used, a runtime-library function is called if one of the intermediate @@ -3019,15 +3019,19 @@ def CIR_ComplexDivOp : CIR_Op<"complex.div", [ let summary = "Complex division"; let description = [{ The `cir.complex.div` operation takes two complex numbers and returns - their division. - - Range is used to select the implementation used when the operation - is lowered to the LLVM dialect. For division, 'improved' and - 'promoted' are all handled equivalently, producing the - Smith's algorithms for Complex division. If 'full' is used, - a runtime-library function is called if one of the intermediate - calculations produced a NaN value, and for 'basic' algebraic formula with - no special handling for NaN value will be used. + their quotient. + + The `range` attribute is used to select the algorithm used when + the operation is lowered to the LLVM dialect. For division, 'improved' + producing the Smith's algorithms for Complex division with no special + handling for NaN values. If 'promoted' is used, the values are promoted + to a higher precision type, if possible, and the calculation is performed + using the algebraic formula. We only fall back on Smith's algorithm when + the target does not support a higher precision type. Also, this only + applies to floating-point types with no special handling for NaN values. + If 'full' is used, a runtime-library function is called if one of the + intermediate calculations produced a NaN value. and for 'basic' algebraic + formula with no special handling for the NaN value will be used. Example: @@ -3040,8 +3044,7 @@ def CIR_ComplexDivOp : CIR_Op<"complex.div", [ let arguments = (ins CIR_ComplexType:$lhs, CIR_ComplexType:$rhs, - CIR_ComplexRangeKind:$range, - UnitAttr:$promoted + CIR_ComplexRangeKind:$range ); let results = (outs CIR_ComplexType:$result); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index b1afab398d7f4..cd65f4180f82a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -10,7 +10,6 @@ namespace { class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> { CIRGenFunction &cgf; CIRGenBuilderTy &builder; - bool fpHasBeenPromoted = false; public: explicit ComplexExprEmitter(CIRGenFunction &cgf) @@ -131,43 +130,9 @@ class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> { mlir::Value emitBinMul(const BinOpInfo &op); mlir::Value emitBinDiv(const BinOpInfo &op); - QualType higherPrecisionTypeForComplexArithmetic(QualType elementType, - bool isDivOpCode) { - ASTContext &astContext = cgf.getContext(); - const QualType higherElementType = - astContext.GetHigherPrecisionFPType(elementType); - const llvm::fltSemantics &elementTypeSemantics = - astContext.getFloatTypeSemantics(elementType); - const llvm::fltSemantics &higherElementTypeSemantics = - astContext.getFloatTypeSemantics(higherElementType); - - // Check that the promoted type can handle the intermediate values without - // overflowing. This can be interpreted as: - // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <= - // LargerType.LargestFiniteVal. - // In terms of exponent it gives this formula: - // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal - // doubles the exponent of SmallerType.LargestFiniteVal) - if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <= - llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) { - fpHasBeenPromoted = true; - return astContext.getComplexType(higherElementType); - } - - // The intermediate values can't be represented in the promoted type - // without overflowing. - return QualType(); - } - QualType getPromotionType(QualType ty, bool isDivOpCode = false) { if (auto *complexTy = ty->getAs<ComplexType>()) { QualType elementTy = complexTy->getElementType(); - if (isDivOpCode && elementTy->isFloatingType() && - cgf.getLangOpts().getComplexRange() == - LangOptions::ComplexRangeKind::CX_Promoted) { - return higherPrecisionTypeForComplexArithmetic(elementTy, isDivOpCode); - } - if (elementTy.UseExcessPrecision(cgf.getContext())) return cgf.getContext().getComplexType(cgf.getContext().FloatTy); } @@ -896,8 +861,7 @@ mlir::Value ComplexExprEmitter::emitBinDiv(const BinOpInfo &op) { mlir::isa<cir::ComplexType>(op.rhs.getType())) { cir::ComplexRangeKind rangeKind = getComplexRangeAttr(op.fpFeatures.getComplexRange()); - return builder.create<cir::ComplexDivOp>(op.loc, op.lhs, op.rhs, rangeKind, - fpHasBeenPromoted); + return builder.create<cir::ComplexDivOp>(op.loc, op.lhs, op.rhs, rangeKind); } cgf.cgm.errorNYI("ComplexExprEmitter::emitBinMu between Complex & Scalar"); diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 676b6bfbdb456..c15637d297cd1 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -8,6 +8,7 @@ #include "PassDetail.h" #include "clang/AST/ASTContext.h" +#include "clang/Basic/TargetInfo.h" #include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" @@ -312,22 +313,125 @@ buildRangeReductionComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, return ternary.getResult(); } -static mlir::Value lowerComplexDiv(LoweringPreparePass &pass, - CIRBaseBuilderTy &builder, - mlir::Location loc, cir::ComplexDivOp op, - mlir::Value lhsReal, mlir::Value lhsImag, - mlir::Value rhsReal, mlir::Value rhsImag) { +static mlir::Type higherPrecisionElementTypeForComplexArithmetic( + mlir::MLIRContext &context, clang::ASTContext &cc, + CIRBaseBuilderTy &builder, mlir::Type elementType) { + + auto getHigherPrecisionFPType = [&context](mlir::Type type) -> mlir::Type { + if (mlir::isa<cir::FP16Type>(type)) + return cir::SingleType::get(&context); + + if (mlir::isa<cir::SingleType>(type) || mlir::isa<cir::BF16Type>(type)) + return cir::DoubleType::get(&context); + + if (mlir::isa<cir::DoubleType>(type)) + return cir::LongDoubleType::get(&context, type); + + return type; + }; + + auto getFloatTypeSemantics = + [&cc](mlir::Type type) -> const llvm::fltSemantics & { + const clang::TargetInfo &info = cc.getTargetInfo(); + if (mlir::isa<cir::FP16Type>(type)) + return info.getHalfFormat(); + + if (mlir::isa<cir::BF16Type>(type)) + return info.getBFloat16Format(); + + if (mlir::isa<cir::SingleType>(type)) + return info.getFloatFormat(); + + if (mlir::isa<cir::DoubleType>(type)) + return info.getDoubleFormat(); + + if (mlir::isa<cir::LongDoubleType>(type)) { + if (cc.getLangOpts().OpenMP && cc.getLangOpts().OpenMPIsTargetDevice) + llvm_unreachable("NYI Float type semantics with OpenMP"); + return info.getLongDoubleFormat(); + } + + if (mlir::isa<cir::FP128Type>(type)) { + if (cc.getLangOpts().OpenMP && cc.getLangOpts().OpenMPIsTargetDevice) + llvm_unreachable("NYI Float type semantics with OpenMP"); + return info.getFloat128Format(); + } + + assert(false && "Unsupported float type semantics"); + }; + + const mlir::Type higherElementType = getHigherPrecisionFPType(elementType); + const llvm::fltSemantics &elementTypeSemantics = + getFloatTypeSemantics(elementType); + const llvm::fltSemantics &higherElementTypeSemantics = + getFloatTypeSemantics(higherElementType); + + // Check that the promoted type can handle the intermediate values without + // overflowing. This can be interpreted as: + // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <= + // LargerType.LargestFiniteVal. + // In terms of exponent it gives this formula: + // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal + // doubles the exponent of SmallerType.LargestFiniteVal) + if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <= + llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) { + return higherElementType; + } + + // The intermediate values can't be represented in the promoted type + // without overflowing. + return {}; +} + +static mlir::Value +lowerComplexDiv(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, + mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal, + mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag, + mlir::MLIRContext &mlirCx, clang::ASTContext &cc) { cir::ComplexType complexTy = op.getType(); if (mlir::isa<cir::FPTypeInterface>(complexTy.getElementType())) { cir::ComplexRangeKind range = op.getRange(); - if (range == cir::ComplexRangeKind::Improved || - (range == cir::ComplexRangeKind::Promoted && !op.getPromoted())) + if (range == cir::ComplexRangeKind::Improved) return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag, rhsReal, rhsImag); + if (range == cir::ComplexRangeKind::Full) return buildComplexBinOpLibCall(pass, builder, &getComplexDivLibCallName, loc, complexTy, lhsReal, lhsImag, rhsReal, rhsImag); + + if (range == cir::ComplexRangeKind::Promoted) { + mlir::Type originalElementType = complexTy.getElementType(); + mlir::Type higherPrecisionElementType = + higherPrecisionElementTypeForComplexArithmetic(mlirCx, cc, builder, + originalElementType); + + if (!higherPrecisionElementType) + return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag, + rhsReal, rhsImag); + + cir::CastKind floatingCastKind = cir::CastKind::floating; + lhsReal = builder.createCast(floatingCastKind, lhsReal, + higherPrecisionElementType); + lhsImag = builder.createCast(floatingCastKind, lhsImag, + higherPrecisionElementType); + rhsReal = builder.createCast(floatingCastKind, rhsReal, + higherPrecisionElementType); + rhsImag = builder.createCast(floatingCastKind, rhsImag, + higherPrecisionElementType); + + mlir::Value algebraicResult = buildAlgebraicComplexDiv( + builder, loc, lhsReal, lhsImag, rhsReal, rhsImag); + + mlir::Value resultReal = builder.createComplexReal(loc, algebraicResult); + mlir::Value resultImag = builder.createComplexImag(loc, algebraicResult); + + mlir::Value finalReal = + builder.createCast(floatingCastKind, resultReal, originalElementType); + mlir::Value finalImag = + builder.createCast(floatingCastKind, resultImag, originalElementType); + return builder.createComplexCreate(loc, finalReal, finalImag); + } } return buildAlgebraicComplexDiv(builder, loc, lhsReal, lhsImag, rhsReal, @@ -345,8 +449,9 @@ void LoweringPreparePass::lowerComplexDivOp(cir::ComplexDivOp op) { mlir::Value rhsReal = builder.createComplexReal(loc, rhs); mlir::Value rhsImag = builder.createComplexImag(loc, rhs); - mlir::Value loweredResult = lowerComplexDiv(*this, builder, loc, op, lhsReal, - lhsImag, rhsReal, rhsImag); + mlir::Value loweredResult = + lowerComplexDiv(*this, builder, loc, op, lhsReal, lhsImag, rhsReal, + rhsImag, getContext(), *astCtx); op.replaceAllUsesWith(loweredResult); op.erase(); } diff --git a/clang/test/CIR/CodeGen/complex-mul-div.cpp b/clang/test/CIR/CodeGen/complex-mul-div.cpp index aa44b72e6aaa3..e11dc1f489e76 100644 --- a/clang/test/CIR/CodeGen/complex-mul-div.cpp +++ b/clang/test/CIR/CodeGen/complex-mul-div.cpp @@ -476,25 +476,21 @@ void foo3() { // OGCG-IMPROVED: store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4 // OGCG-IMPROVED: store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4 -// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.double> +// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.float> // LLVM-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-PROMOTED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-PROMOTED: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4 +// LLVM-PROMOTED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4 // LLVM-PROMOTED: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 // LLVM-PROMOTED: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 -// LLVM-PROMOTED: %[[A_REAL_F64:.*]] = fpext float %[[A_REAL]] to double -// LLVM-PROMOTED: %[[A_IMAG_F64:.*]] = fpext float %[[A_IMAG]] to double -// LLVM-PROMOTED: %[[TMP_A_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[A_REAL_F64]], 0 -// LLVM-PROMOTED: %[[A_CF64:.*]] = insertvalue { double, double } %[[TMP_A_CF64]], double %[[A_IMAG_F64]], 1 -// LLVM-PROMOTED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4 // LLVM-PROMOTED: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0 // LLVM-PROMOTED: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1 +// LLVM-PROMOTED: %[[A_REAL_F64:.*]] = fpext float %[[A_REAL]] to double +// LLVM-PROMOTED: %[[A_IMAG_F64:.*]] = fpext float %[[A_IMAG]] to double // LLVM-PROMOTED: %[[B_REAL_F64:.*]] = fpext float %[[B_REAL]] to double // LLVM-PROMOTED: %[[B_IMAG_F64:.*]] = fpext float %[[B_IMAG]] to double -// LLVM-PROMOTED: %[[TMP_B_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[B_REAL_F64]], 0 -// LLVM-PROMOTED: %[[B_CF64:.*]] = insertvalue { double, double } %[[TMP_B_CF64]], double %[[B_IMAG_F64]], 1 // LLVM-PROMOTED: %[[MUL_AR_BR:.*]] = fmul double %[[A_REAL_F64]], %[[B_REAL_F64]] // LLVM-PROMOTED: %[[MUL_AI_BI:.*]] = fmul double %[[A_IMAG_F64]], %[[B_IMAG_F64]] // LLVM-PROMOTED: %[[MUL_BR_BR:.*]] = fmul double %[[B_REAL_F64]], %[[B_REAL_F64]] @@ -503,16 +499,16 @@ void foo3() { // LLVM-PROMOTED: %[[ADD_BRBR_BIBI:.*]] = fadd double %[[MUL_BR_BR]], %[[MUL_BI_BI]] // LLVM-PROMOTED: %[[RESULT_REAL:.*]] = fdiv double %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] // LLVM-PROMOTED: %[[MUL_AI_BR:.*]] = fmul double %[[A_IMAG_F64]], %[[B_REAL_F64]] -// LLVM-PROMOTED: %[[MUL_AR_BI:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]] -// LLVM-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]] -// LLVM-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_ARBI]], %23 -// LLVM-PROMOTED: %[[TMP_RESULT_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[RESULT_REAL]], 0 -// LLVM-PROMOTED: %[[RESULT_CF64:.*]] = insertvalue { double, double } %[[TMP_RESULT_CF64]], double %[[RESULT_IMAG]], 1 +// LLVM-PROMOTED: %[[MUL_AR_BR:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]] +// LLVM-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BR]] +// LLVM-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_ARBI]], %[[ADD_BRBR_BIBI]] +// LLVM-PROMOTED: %[[TMP_RESULT_F64:.*]] = insertvalue { double, double } {{.*}}, double %[[RESULT_REAL]], 0 +// LLVM-PROMOTED: %[[RESULT_F64:.*]] = insertvalue { double, double } %[[TMP_RESULT_F64]], double %[[RESULT_IMAG]], 1 // LLVM-PROMOTED: %[[RESULT_REAL_F32:.*]] = fptrunc double %[[RESULT_REAL]] to float // LLVM-PROMOTED: %[[RESULT_IMAG_F32:.*]] = fptrunc double %[[RESULT_IMAG]] to float -// LLVM-PROMOTED: %[[TMP_RESULT_CF32:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL_F32]], 0 -// LLVM-PROMOTED: %[[RESULT_CF32:.*]] = insertvalue { float, float } %[[TMP_RESULT_CF32]], float %[[RESULT_IMAG_F32]], 1 -// LLVM-PROMOTED: store { float, float } %[[RESULT_CF32]], ptr %[[C_ADDR]], align 4 +// LLVM-PROMOTED: %[[TMP_RESULT_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL_F32]], 0 +// LLVM-PROMOTED: %[[RESULT_F32:.*]] = insertvalue { float, float } %[[TMP_RESULT_F32]], float %[[RESULT_IMAG_F32]], 1 +// LLVM-PROMOTED: store { float, float } %[[RESULT_F32]], ptr %[[C_ADDR]], align 4 // OGCG-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, align 4 // OGCG-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, align 4 @@ -537,9 +533,9 @@ void foo3() { // OGCG-PROMOTED: %[[ADD_BRBR_BIBI:.*]] = fadd double %[[MUL_BR_BR]], %[[MUL_BI_BI]] // OGCG-PROMOTED: %[[MUL_AI_BR:.*]] = fmul double %[[A_IMAG_F64]], %[[B_REAL_F64]] // OGCG-PROMOTED: %[[MUL_AR_BI:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]] -// OGCG-PROMOTED: %[[SUB_AIBR_BRBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]] +// OGCG-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]] // OGCG-PROMOTED: %[[RESULT_REAL:.*]] = fdiv double %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]] -// OGCG-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_BRBI]], %[[ADD_BRBR_BIBI]] +// OGCG-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_ARBI]], %[[ADD_BRBR_BIBI]] // OGCG-PROMOTED: %[[UNPROMOTION_RESULT_REAL:.*]] = fptrunc double %[[RESULT_REAL]] to float // OGCG-PROMOTED: %[[UNPROMOTION_RESULT_IMAG:.*]] = fptrunc double %[[RESULT_IMAG]] to float // OGCG-PROMOTED: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0 >From fc62a606fb25ad7081ba9ed783cacd9cff102b9c Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Sat, 16 Aug 2025 18:10:35 +0200 Subject: [PATCH 3/6] Address code review comments --- clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index cd65f4180f82a..2492b0115dbe1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -861,10 +861,11 @@ mlir::Value ComplexExprEmitter::emitBinDiv(const BinOpInfo &op) { mlir::isa<cir::ComplexType>(op.rhs.getType())) { cir::ComplexRangeKind rangeKind = getComplexRangeAttr(op.fpFeatures.getComplexRange()); - return builder.create<cir::ComplexDivOp>(op.loc, op.lhs, op.rhs, rangeKind); + return cir::ComplexDivOp::create(builder, op.loc, op.lhs, op.rhs, + rangeKind); } - cgf.cgm.errorNYI("ComplexExprEmitter::emitBinMu between Complex & Scalar"); + cgf.cgm.errorNYI("ComplexExprEmitter::emitBinDiv between Complex & Scalar"); return {}; } >From 511115f0975a627513061a73ab1ba1a8ee18b5d7 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Thu, 21 Aug 2025 12:34:08 +0200 Subject: [PATCH 4/6] Add tests for ComplexDiv after LoweringPrepare pass --- clang/test/CIR/CodeGen/complex-mul-div.cpp | 141 ++++++++++++++++++++- 1 file changed, 135 insertions(+), 6 deletions(-) diff --git a/clang/test/CIR/CodeGen/complex-mul-div.cpp b/clang/test/CIR/CodeGen/complex-mul-div.cpp index e11dc1f489e76..0fb82b24f6640 100644 --- a/clang/test/CIR/CodeGen/complex-mul-div.cpp +++ b/clang/test/CIR/CodeGen/complex-mul-div.cpp @@ -1,7 +1,7 @@ // complex-range basic // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=basic -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-BASIC %s // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-cir %s -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED +// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED,CIR-AFTER-BASIC // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll // RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED,LLVM-BASIC // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -emit-llvm %s -o %t.ll @@ -10,7 +10,7 @@ // complex-range improved // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=improved -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-IMPROVED %s // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-cir %s -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED +// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED,CIR-AFTER-IMPROVED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll // RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED,LLVM-IMPROVED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -emit-llvm %s -o %t.ll @@ -19,7 +19,7 @@ // complex-range promoted // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=promoted -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-PROMOTED %s // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-cir %s -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED +// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED,CIR-COMBINED,CIR-AFTER-PROMOTED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll // RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED,LLVM-COMBINED,LLVM-PROMOTED // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -emit-llvm %s -o %t.ll @@ -333,6 +333,29 @@ void foo3() { // CIR-BEFORE-BASIC: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(basic) : !cir.complex<!cir.float> +// CIR-AFTER-BASIC: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"] +// CIR-AFTER-BASIC: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"] +// CIR-AFTER-BASIC: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init] +// CIR-AFTER-BASIC: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-BASIC: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-BASIC: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-BASIC: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-BASIC: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-BASIC: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-BASIC: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !cir.float +// CIR-AFTER-BASIC: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !cir.float +// CIR-AFTER-BASIC: %[[MUL_BR_BR:.*]] = cir.binop(mul, %[[B_REAL]], %[[B_REAL]]) : !cir.float +// CIR-AFTER-BASIC: %[[MUL_BI_BI:.*]] = cir.binop(mul, %[[B_IMAG]], %[[B_IMAG]]) : !cir.float +// CIR-AFTER-BASIC: %[[ADD_ARBR_AIBI:.*]] = cir.binop(add, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !cir.float +// CIR-AFTER-BASIC: %[[ADD_BRBR_BIBI:.*]] = cir.binop(add, %[[MUL_BR_BR]], %[[MUL_BI_BI]]) : !cir.float +// CIR-AFTER-BASIC: %[[RESULT_REAL:.*]] = cir.binop(div, %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]]) : !cir.float +// CIR-AFTER-BASIC: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !cir.float +// CIR-AFTER-BASIC: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !cir.float +// CIR-AFTER-BASIC: %[[SUB_AIBR_ARBI:.*]] = cir.binop(sub, %[[MUL_AI_BR]], %[[MUL_AR_BI]]) : !cir.float +// CIR-AFTER-BASIC: %[[RESULT_IMAG:.*]] = cir.binop(div, %[[SUB_AIBR_ARBI]], %14) : !cir.float +// CIR-AFTER-BASIC: %[[RESULT:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !cir.float -> !cir.complex<!cir.float> +// CIR-AFTER-BASIC: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> + // LLVM-BASIC: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-BASIC: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-BASIC: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 @@ -386,6 +409,45 @@ void foo3() { // CIR-BEFORE-IMPROVED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(improved) : !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"] +// CIR-AFTER-IMPROVED: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"] +// CIR-AFTER-IMPROVED: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init] +// CIR-AFTER-IMPROVED: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-IMPROVED: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-IMPROVED: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-IMPROVED: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-IMPROVED: %[[ABS_B_REAL:.*]] = cir.fabs %[[B_REAL]] : !cir.float +// CIR-AFTER-IMPROVED: %[[ABS_B_IMAG:.*]] = cir.fabs %[[B_IMAG]] : !cir.float +// CIR-AFTER-IMPROVED: %[[ABS_B_CMP:.*]] = cir.cmp(ge, %[[ABS_B_REAL]], %[[ABS_B_IMAG]]) : !cir.float, !cir.bool +// CIR-AFTER-IMPROVED: %[[RESULT:.*]] = cir.ternary(%[[ABS_B_CMP]], true { +// CIR-AFTER-IMPROVED: %[[DIV_BI_BR:.*]] = cir.binop(div, %[[B_IMAG]], %[[B_REAL]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[MUL_DIV_BIBR_BI:.*]] = cir.binop(mul, %[[DIV_BI_BR]], %[[B_IMAG]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[ADD_BR_MUL_DIV_BIBR_BI:.*]] = cir.binop(add, %[[B_REAL]], %[[MUL_DIV_BIBR_BI]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[MUL_AI_DIV_BIBR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[DIV_BI_BR]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[ADD_AR_MUL_AI_DIV_BIBR:.*]] = cir.binop(add, %[[A_REAL]], %[[MUL_AI_DIV_BIBR]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[RESULT_REAL:.*]] = cir.binop(div, %[[ADD_AR_MUL_AI_DIV_BIBR]], %[[ADD_BR_MUL_DIV_BIBR_BI]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[MUL_AR_DIV_BIBR:.*]] = cir.binop(mul, %[[A_REAL]], %[[DIV_BI_BR]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[SUB_AI_MUL_AR_DIV_BIBR:.*]] = cir.binop(sub, %[[A_IMAG]], %[[MUL_AR_DIV_BIBR]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[RESULT_IMAG:.*]] = cir.binop(div, %[[SUB_AI_MUL_AR_DIV_BIBR]], %[[ADD_BR_MUL_DIV_BIBR_BI]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[RESULT_COMPLEX:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !cir.float -> !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: cir.yield %[[RESULT_COMPLEX]] : !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: }, false { +// CIR-AFTER-IMPROVED: %[[DIV_BR_BI:.*]] = cir.binop(div, %[[B_REAL]], %[[B_IMAG]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[MUL_DIV_BRBI_BR:.*]] = cir.binop(mul, %[[DIV_BR_BI]], %[[B_REAL]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[ADD_BI_MUL_DIV_BRBI_BR:.*]] = cir.binop(add, %[[B_IMAG]], %[[MUL_DIV_BRBI_BR]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[MUL_AR_DIV_BIBR:.*]] = cir.binop(mul, %[[A_REAL]], %[[DIV_BR_BI]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[ADD_MUL_AR_DIV_BRBI_AI:.*]] = cir.binop(add, %[[MUL_AR_DIV_BIBR]], %[[A_IMAG]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[RESULT_REAL:.*]] = cir.binop(div, %[[ADD_MUL_AR_DIV_BRBI_AI]], %[[ADD_BI_MUL_DIV_BRBI_BR]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[MUL_AI_DIV_BRBI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[DIV_BR_BI]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[SUB_MUL_AI_DIV_BRBI_AR:.*]] = cir.binop(sub, %[[MUL_AI_DIV_BRBI]], %[[A_REAL]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[RESULT_IMAG:.*]] = cir.binop(div, %[[SUB_MUL_AI_DIV_BRBI_AR]], %[[ADD_BI_MUL_DIV_BRBI_BR]]) : !cir.float +// CIR-AFTER-IMPROVED: %[[RESULT_COMPLEX:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !cir.float -> !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: cir.yield %[[RESULT_COMPLEX]] : !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: }) : (!cir.bool) -> !cir.complex<!cir.float> +// CIR-AFTER-IMPROVED: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> + // LLVM-IMPROVED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-IMPROVED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-IMPROVED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 @@ -406,8 +468,8 @@ void foo3() { // LLVM-IMPROVED: %[[MUL_AI_DIV_BIBR:.*]] = fmul float %[[A_IMAG]], %[[DIV_BI_BR]] // LLVM-IMPROVED: %[[ADD_AR_MUL_AI_DIV_BIBR:.*]] = fadd float %[[A_REAL]], %[[MUL_AI_DIV_BIBR]] // LLVM-IMPROVED: %[[RESULT_REAL:.*]] = fdiv float %[[ADD_AR_MUL_AI_DIV_BIBR]], %16 -// LLVM-IMPROVED: %[[MUL_AR_DIV_BI_BR:.*]] = fmul float %[[A_REAL]], %[[DIV_BI_BR]] -// LLVM-IMPROVED: %[[SUB_AI_MUL_AR_DIV_BIBR:.*]] = fsub float %[[A_IMAG]], %[[MUL_AR_DIV_BI_BR]] +// LLVM-IMPROVED: %[[MUL_AR_DIV_BIBR:.*]] = fmul float %[[A_REAL]], %[[DIV_BI_BR]] +// LLVM-IMPROVED: %[[SUB_AI_MUL_AR_DIV_BIBR:.*]] = fsub float %[[A_IMAG]], %[[MUL_AR_DIV_BIBR]] // LLVM-IMPROVED: %[[RESULT_IMAG:.*]] = fdiv float %[[SUB_AI_MUL_AR_DIV_BIBR]], %[[ADD_BR_MUL_DIV_BIBR_BI]] // LLVM-IMPROVED: %[[TMP_THEN_RESULT:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL]], 0 // LLVM-IMPROVED: %[[THEN_RESULT:.*]] = insertvalue { float, float } %[[TMP_THEN_RESULT]], float %[[RESULT_IMAG]], 1 @@ -478,6 +540,38 @@ void foo3() { // CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.float> +// CIR-AFTER-PROMOTED: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"] +// CIR-AFTER-PROMOTED: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"] +// CIR-AFTER-PROMOTED: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init] +// CIR-AFTER-PROMOTED: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-PROMOTED: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-PROMOTED: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-PROMOTED: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-PROMOTED: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-PROMOTED: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-PROMOTED: %[[A_REAL_F64:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.float), !cir.double +// CIR-AFTER-PROMOTED: %[[A_IMAG_F64:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.float), !cir.double +// CIR-AFTER-PROMOTED: %[[B_REAL_F64:.*]] = cir.cast(floating, %[[B_REAL]] : !cir.float), !cir.double +// CIR-AFTER-PROMOTED: %[[B_IMAG_F64:.*]] = cir.cast(floating, %[[B_IMAG]] : !cir.float), !cir.double +// CIR-AFTER-PROMOTED: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL_F64]], %[[B_REAL_F64]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG_F64]], %[[B_IMAG_F64]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[MUL_BR_BR:.*]] = cir.binop(mul, %[[B_REAL_F64]], %[[B_REAL_F64]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[MUL_BI_BI:.*]] = cir.binop(mul, %[[B_IMAG_F64]], %[[B_IMAG_F64]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[ADD_ARBR_AIBI:.*]] = cir.binop(add, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[ADD_BRBR_BIBI:.*]] = cir.binop(add, %[[MUL_BR_BR]], %[[MUL_BI_BI]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[RESULT_REAL:.*]] = cir.binop(div, %[[ADD_ARBR_AIBI]], %18) : !cir.double +// CIR-AFTER-PROMOTED: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG_F64]], %[[B_REAL_F64]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL_F64]], %[[B_IMAG_F64]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = cir.binop(sub, %[[MUL_AI_BR]], %[[MUL_AR_BI]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[RESULT_IMAG:.*]] = cir.binop(div, %[[SUB_AIBR_ARBI]], %[[ADD_BRBR_BIBI]]) : !cir.double +// CIR-AFTER-PROMOTED: %[[RESULT_F64:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !cir.double -> !cir.complex<!cir.double> +// CIR-AFTER-PROMOTED: %[[RESULT_REAL_F64:.*]] = cir.complex.real %[[RESULT_F64]] : !cir.complex<!cir.double> -> !cir.double +// CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F64:.*]] = cir.complex.imag %[[RESULT_F64]] : !cir.complex<!cir.double> -> !cir.double +// CIR-AFTER-PROMOTED: %[[RESULT_REAL_F32:.*]] = cir.cast(floating, %[[RESULT_REAL_F64]] : !cir.double), !cir.float +// CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F32:.*]] = cir.cast(floating, %[[RESULT_IMAG_F64]] : !cir.double), !cir.float +// CIR-AFTER-PROMOTED: %[[RESULT_F32:.*]] = cir.complex.create %[[RESULT_REAL_F32]], %[[RESULT_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float> +// CIR-AFTER-PROMOTED: cir.store{{.*}} %[[RESULT_F32]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> + // LLVM-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-PROMOTED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 @@ -510,7 +604,7 @@ void foo3() { // LLVM-PROMOTED: %[[RESULT_F32:.*]] = insertvalue { float, float } %[[TMP_RESULT_F32]], float %[[RESULT_IMAG_F32]], 1 // LLVM-PROMOTED: store { float, float } %[[RESULT_F32]], ptr %[[C_ADDR]], align 4 -// OGCG-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, align 4 +// OGCG-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, align 4 // OGCG-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, align 4 // OGCG-PROMOTED: %[[C_ADDR:.*]] = alloca { float, float }, align 4 // OGCG-PROMOTED: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0 @@ -545,6 +639,18 @@ void foo3() { // CIR-BEFORE-FULL: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(full) : !cir.complex<!cir.float> +// CIR-AFTER-FULL: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"] +// CIR-AFTER-FULL: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"] +// CIR-AFTER-FULL: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init] +// CIR-AFTER-FULL: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-FULL: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float> +// CIR-AFTER-FULL: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-FULL: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-FULL: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-FULL: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float +// CIR-AFTER-FULL: %[[RESULT:.*]] = cir.call @__divsc3(%[[A_REAL]], %[[A_IMAG]], %[[B_REAL]], %[[B_IMAG]]) : (!cir.float, !cir.float, !cir.float, !cir.float) -> !cir.complex<!cir.float> +// CIR-AFTER-FULL: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> + // LLVM-FULL: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-FULL: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4 // LLVM-FULL: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4 @@ -594,6 +700,29 @@ void foo4() { // CIR-BEFORE-FULL: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(full) : !cir.complex<!s32i> +// CIR-COMBINED: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"] +// CIR-COMBINED: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"] +// CIR-COMBINED: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init] +// CIR-COMBINED: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i> +// CIR-COMBINED: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i> +// CIR-COMBINED: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!s32i> -> !s32i +// CIR-COMBINED: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!s32i> -> !s32i +// CIR-COMBINED: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!s32i> -> !s32i +// CIR-COMBINED: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!s32i> -> !s32i +// CIR-COMBINED: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !s32i +// CIR-COMBINED: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !s32i +// CIR-COMBINED: %[[MUL_BR_BR:.*]] = cir.binop(mul, %[[B_REAL]], %[[B_REAL]]) : !s32i +// CIR-COMBINED: %[[MUL_BI_BI:.*]] = cir.binop(mul, %[[B_IMAG]], %[[B_IMAG]]) : !s32i +// CIR-COMBINED: %[[ADD_ARBR_AIBI:.*]] = cir.binop(add, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !s32i +// CIR-COMBINED: %[[ADD_BRBR_BIBI:.*]] = cir.binop(add, %[[MUL_BR_BR]], %[[MUL_BI_BI]]) : !s32i +// CIR-COMBINED: %[[RESULT_REAL:.*]] = cir.binop(div, %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]]) : !s32i +// CIR-COMBINED: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !s32i +// CIR-COMBINED: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !s32i +// CIR-COMBINED: %[[SUB_AIBR_ARBI:.*]] = cir.binop(sub, %[[MUL_AI_BR]], %[[MUL_AR_BI]]) : !s32i +// CIR-COMBINED: %[[RESULT_IMAG:.*]] = cir.binop(div, %[[SUB_AIBR_ARBI]], %14) : !s32i +// CIR-COMBINED: %[[RESULT:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !s32i -> !cir.complex<!s32i> +// CIR-COMBINED: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>> + // LLVM-COMBINED: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 // LLVM-COMBINED: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 // LLVM-COMBINED: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 >From 674541094b71f765a80a0ad3a361b7cadeba4457 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Fri, 22 Aug 2025 20:44:10 +0200 Subject: [PATCH 5/6] Update description of ComplexDiv Op and allign MulOp --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 29 ++++++++++---------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 54775e35b6f88..1585bf4d59398 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2985,12 +2985,13 @@ def CIR_ComplexMulOp : CIR_Op<"complex.mul", [ The `cir.complex.mul` operation takes two complex numbers and returns their product. - The `range` attribute is used to select the algorithm used when the - operation is lowered to the LLVM dialect. For multiplication, 'improved', - 'promoted', and 'basic' are all handled equivalently, producing the - algebraic formula with no special handling for NaN value. If 'full' is - used, a runtime-library function is called if one of the intermediate - calculations produced a NaN value. + For complex types with floating-point components, the `range` attribute + specifies the algorithm to be used when the operation is lowered to + the LLVM dialect. For multiplication, 'improved', 'promoted', and 'basic' + are all handled equivalently, producing the algebraic formula with no + special handling for NaN value. If 'full' is used, a runtime-library + function is called if one of the intermediate calculations produced + a NaN value. Example: @@ -3021,14 +3022,14 @@ def CIR_ComplexDivOp : CIR_Op<"complex.div", [ The `cir.complex.div` operation takes two complex numbers and returns their quotient. - The `range` attribute is used to select the algorithm used when - the operation is lowered to the LLVM dialect. For division, 'improved' - producing the Smith's algorithms for Complex division with no special - handling for NaN values. If 'promoted' is used, the values are promoted - to a higher precision type, if possible, and the calculation is performed - using the algebraic formula. We only fall back on Smith's algorithm when - the target does not support a higher precision type. Also, this only - applies to floating-point types with no special handling for NaN values. + For complex types with floating-point components, the `range` attribute + specifies the algorithm to be used when the operation is lowered to + the LLVM dialect. For division, 'improved' produces Smith's algorithms for + Complex division with no additional handling for NaN values. If 'promoted' + is used, the values are promoted to a higher precision type, if possible, + and the calculation is performed using the algebraic formula, with + no additional handling for NaN values. We fall back on Smith's algorithm + when the target does not support a higher precision type. If 'full' is used, a runtime-library function is called if one of the intermediate calculations produced a NaN value. and for 'basic' algebraic formula with no special handling for the NaN value will be used. >From a9a9fcaf8799f917861d8707826d1a5b7a9f3b90 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Fri, 22 Aug 2025 22:27:41 +0200 Subject: [PATCH 6/6] Fix format and add note for ints --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 1585bf4d59398..d65e26497ed06 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -3029,10 +3029,11 @@ def CIR_ComplexDivOp : CIR_Op<"complex.div", [ is used, the values are promoted to a higher precision type, if possible, and the calculation is performed using the algebraic formula, with no additional handling for NaN values. We fall back on Smith's algorithm - when the target does not support a higher precision type. - If 'full' is used, a runtime-library function is called if one of the - intermediate calculations produced a NaN value. and for 'basic' algebraic - formula with no special handling for the NaN value will be used. + when the target doesn't support a higher precision type. If 'full' is used, + a runtime-library function is called if one of the intermediate + calculations produced a NaN value. and for 'basic' algebraic formula with + no additional handling for the NaN value will be used. For integers types + `range` attribute will be ignored. Example: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits