https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/169310
>From 627bcb3bde64a780ed2b9aaaa9267d97c9679f9c Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <[email protected]> Date: Wed, 26 Nov 2025 17:45:00 +0530 Subject: [PATCH 1/2] Add CIR sqrt builtin support for X86 --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 344 +++++++++++++++++- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 66 +++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 323 +++++++++++++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 14 + .../CIR/CodeGen/X86/cir-sqrtps-builtins.c | 46 +++ 5 files changed, 772 insertions(+), 21 deletions(-) create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index e612d6a0ba886..291b035e6204c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -802,8 +802,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [ //===----------------------------------------------------------------------===// defvar CIR_YieldableScopes = [ - "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp", - "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp" + "ArrayCtor", "ArrayDtor", "AwaitOp", "CaseOp", "DoWhileOp", "ForOp", + "GlobalOp", "IfOp", "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp" ]; def CIR_YieldOp : CIR_Op<"yield", [ @@ -1640,6 +1640,82 @@ def CIR_CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> { let isLLVMLoweringRecursive = true; } +//===----------------------------------------------------------------------===// +// BinOpOverflowOp +//===----------------------------------------------------------------------===// + +def CIR_BinOpOverflowKind : CIR_I32EnumAttr< + "BinOpOverflowKind", "checked binary arithmetic operation kind", [ + I32EnumAttrCase<"Add", 0, "add">, + I32EnumAttrCase<"Sub", 1, "sub">, + I32EnumAttrCase<"Mul", 2, "mul"> +]>; + +def CIR_BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> { + let summary = "Perform binary integral arithmetic with overflow checking"; + let description = [{ + `cir.binop.overflow` performs binary arithmetic operations with overflow + checking on integral operands. + + The `kind` argument specifies the kind of arithmetic operation to perform. + It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments + specify the input operands of the arithmetic operation. The types of `lhs` + and `rhs` must be the same. + + `cir.binop.overflow` produces two SSA values. `result` is the result of the + arithmetic operation truncated to its specified type. `overflow` is a + boolean value indicating whether overflow happens during the operation. + + The exact semantic of this operation is as follows: + + - `lhs` and `rhs` are promoted to an imaginary integral type that has + infinite precision. + - The arithmetic operation is performed on the promoted operands. + - The infinite-precision result is truncated to the type of `result`. The + truncated result is assigned to `result`. + - If the truncated result is equal to the un-truncated result, `overflow` + is assigned to false. Otherwise, `overflow` is assigned to true. + }]; + + let arguments = (ins + CIR_BinOpOverflowKind:$kind, + CIR_IntType:$lhs, + CIR_IntType:$rhs + ); + + let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow); + + let assemblyFormat = [{ + `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,` + `(` qualified(type($result)) `,` qualified(type($overflow)) `)` + attr-dict + }]; + + let builders = [ + OpBuilder<(ins "cir::IntType":$resultTy, + "cir::BinOpOverflowKind":$kind, + "mlir::Value":$lhs, + "mlir::Value":$rhs), [{ + auto overflowTy = cir::BoolType::get($_builder.getContext()); + build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs); + }]> + ]; + + let extraLLVMLoweringPatternDecl = [{ + static std::string getLLVMIntrinName(cir::BinOpOverflowKind opKind, + bool isSigned, unsigned width); + + struct EncompassedTypeInfo { + bool sign; + unsigned width; + }; + + static EncompassedTypeInfo computeEncompassedTypeWidth(cir::IntType operandTy, + cir::IntType resultTy); + }]; +} + + //===----------------------------------------------------------------------===// // BinOp //===----------------------------------------------------------------------===// @@ -2533,7 +2609,9 @@ def CIR_FuncOp : CIR_Op<"func", [ OptionalAttr<DictArrayAttr>:$res_attrs, OptionalAttr<FlatSymbolRefAttr>:$aliasee, CIR_OptionalPriorityAttr:$global_ctor_priority, - CIR_OptionalPriorityAttr:$global_dtor_priority); + CIR_OptionalPriorityAttr:$global_dtor_priority, + OptionalAttr<CIR_CXXSpecialMemberAttr>:$cxx_special_member + ); let regions = (region AnyRegion:$body); @@ -2572,7 +2650,32 @@ def CIR_FuncOp : CIR_Op<"func", [ //===------------------------------------------------------------------===// bool isDeclaration(); - }]; + + //===------------------------------------------------------------------===// + // C++ Special Member Functions + //===------------------------------------------------------------------===// + + /// Returns true if this function is a C++ special member function. + bool isCXXSpecialMemberFunction(); + + bool isCxxConstructor(); + bool isCxxDestructor(); + + /// Returns true if this function is a copy or move assignment operator. + bool isCxxSpecialAssignment(); + + /// Returns the kind of constructor this function represents, if any. + std::optional<CtorKind> getCxxConstructorKind(); + + /// Returns the kind of assignment operator (move, copy) this function + /// represents, if any. + std::optional<AssignKind> getCxxSpecialAssignKind(); + + /// Returns true if the function is a trivial C++ member functions such as + /// trivial default constructor, copy/move constructor, copy/move assignment, + /// or destructor. + bool isCxxTrivialMemberFunction(); +}]; let hasCustomAssemblyFormat = 1; let hasVerifier = 1; @@ -2752,6 +2855,100 @@ def CIR_CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> { ]; } +//===----------------------------------------------------------------------===// +// AwaitOp +//===----------------------------------------------------------------------===// + +def CIR_AwaitKind : CIR_I32EnumAttr<"AwaitKind", "await kind", [ + I32EnumAttrCase<"Init", 0, "init">, + I32EnumAttrCase<"User", 1, "user">, + I32EnumAttrCase<"Yield", 2, "yield">, + I32EnumAttrCase<"Final", 3, "final"> +]>; + +def CIR_AwaitOp : CIR_Op<"await",[ + DeclareOpInterfaceMethods<RegionBranchOpInterface>, + RecursivelySpeculatable, NoRegionArguments +]> { + let summary = "Wraps C++ co_await implicit logic"; + let description = [{ + The under the hood effect of using C++ `co_await expr` roughly + translates to: + + ```c++ + // co_await expr; + + auto &&x = CommonExpr(); + if (!x.await_ready()) { + ... + x.await_suspend(...); + ... + } + x.await_resume(); + ``` + + `cir.await` represents this logic by using 3 regions: + - ready: covers veto power from x.await_ready() + - suspend: wraps actual x.await_suspend() logic + - resume: handles x.await_resume() + + Breaking this up in regions allows individual scrutiny of conditions + which might lead to folding some of them out. Lowerings coming out + of CIR, e.g. LLVM, should use the `suspend` region to track more + lower level codegen (e.g. intrinsic emission for coro.save/coro.suspend). + + There are also 4 flavors of `cir.await` available: + - `init`: compiler generated initial suspend via implicit `co_await`. + - `user`: also known as normal, representing a user written `co_await`. + - `yield`: user written `co_yield` expressions. + - `final`: compiler generated final suspend via implicit `co_await`. + + ```mlir + cir.scope { + ... // auto &&x = CommonExpr(); + cir.await(user, ready : { + ... // x.await_ready() + }, suspend : { + ... // x.await_suspend() + }, resume : { + ... // x.await_resume() + }) + } + ``` + + Note that resulution of the common expression is assumed to happen + as part of the enclosing await scope. + }]; + + let arguments = (ins CIR_AwaitKind:$kind); + let regions = (region SizedRegion<1>:$ready, + SizedRegion<1>:$suspend, + SizedRegion<1>:$resume); + let assemblyFormat = [{ + `(` $kind `,` + `ready` `:` $ready `,` + `suspend` `:` $suspend `,` + `resume` `:` $resume `,` + `)` + attr-dict + }]; + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<(ins + "cir::AwaitKind":$kind, + CArg<"BuilderCallbackRef", + "nullptr">:$readyBuilder, + CArg<"BuilderCallbackRef", + "nullptr">:$suspendBuilder, + CArg<"BuilderCallbackRef", + "nullptr">:$resumeBuilder + )> + ]; + + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // CopyOp //===----------------------------------------------------------------------===// @@ -2988,6 +3185,39 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> { let hasCustomAssemblyFormat = 1; } +//===----------------------------------------------------------------------===// +// SqrtOp +//===----------------------------------------------------------------------===// + +def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> { + let summary = "Floating-point square root"; + + let description = [{ + The `cir.sqrt` operation computes the element-wise square root of its input. + + The input must be either: + • a floating-point scalar type, or + • a vector whose element type is floating-point. + + The result type must match the input type exactly. + + Examples: + // scalar + %r = cir.sqrt %x : !cir.fp64 + + // vector + %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4> + }]; + + // input and output types: float or vector-of-float + let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input); + let results = (outs CIR_AnyFloatOrVecOfFloatType:$result); + + let assemblyFormat = [{ + $input `:` type($input) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // UnreachableOp //===----------------------------------------------------------------------===// @@ -4018,6 +4248,72 @@ def CIR_RotateOp : CIR_Op<"rotate", [Pure, SameOperandsAndResultType]> { let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// FPClass Test Flags +//===----------------------------------------------------------------------===// + +def FPClassTestEnum : CIR_I32EnumAttr<"FPClassTest", "floating-point class test flags", [ + // Basic flags + I32EnumAttrCase<"SignalingNaN", 1, "fcSNan">, + I32EnumAttrCase<"QuietNaN", 2, "fcQNan">, + I32EnumAttrCase<"NegativeInfinity", 4, "fcNegInf">, + I32EnumAttrCase<"NegativeNormal", 8, "fcNegNormal">, + I32EnumAttrCase<"NegativeSubnormal", 16, "fcNegSubnormal">, + I32EnumAttrCase<"NegativeZero", 32, "fcNegZero">, + I32EnumAttrCase<"PositiveZero", 64, "fcPosZero">, + I32EnumAttrCase<"PositiveSubnormal", 128, "fcPosSubnormal">, + I32EnumAttrCase<"PositiveNormal", 256, "fcPosNormal">, + I32EnumAttrCase<"PositiveInfinity", 512, "fcPosInf">, + + // Composite flags + I32EnumAttrCase<"Nan", 3, "fcNan">, // fcSNan | fcQNan + I32EnumAttrCase<"Infinity", 516, "fcInf">, // fcPosInf | fcNegInf + I32EnumAttrCase<"Normal", 264, "fcNormal">, // fcPosNormal | fcNegNormal + I32EnumAttrCase<"Subnormal", 144, "fcSubnormal">, // fcPosSubnormal | fcNegSubnormal + I32EnumAttrCase<"Zero", 96, "fcZero">, // fcPosZero | fcNegZero + I32EnumAttrCase<"PositiveFinite", 448, "fcPosFinite">,// fcPosNormal | fcPosSubnormal | fcPosZero + I32EnumAttrCase<"NegativeFinite", 56, "fcNegFinite">, // fcNegNormal | fcNegSubnormal | fcNegZero + I32EnumAttrCase<"Finite", 504, "fcFinite">, // fcPosFinite | fcNegFinite + I32EnumAttrCase<"Positive", 960, "fcPositive">, // fcPosFinite | fcPosInf + I32EnumAttrCase<"Negative", 60, "fcNegative">, // fcNegFinite | fcNegInf + I32EnumAttrCase<"All", 1023, "fcAllFlags">, // fcNan | fcInf | fcFinite +]> { + let cppNamespace = "::cir"; +} + +def CIR_IsFPClassOp : CIR_Op<"is_fp_class"> { + let summary = "Corresponding to the `__builtin_fpclassify` builtin function in clang"; + + let description = [{ + The `cir.is_fp_class` operation takes a floating-point value as its first + argument and a bitfield of flags as its second argument. The operation + returns a boolean value indicating whether the floating-point value + satisfies the given flags. + + The flags must be a compile time constant and the values are: + + | Bit # | floating-point class | + | ----- | -------------------- | + | 0 | Signaling NaN | + | 1 | Quiet NaN | + | 2 | Negative infinity | + | 3 | Negative normal | + | 4 | Negative subnormal | + | 5 | Negative zero | + | 6 | Positive zero | + | 7 | Positive subnormal | + | 8 | Positive normal | + | 9 | Positive infinity | + }]; + + let arguments = (ins CIR_AnyFloatType:$src, + FPClassTestEnum:$flags); + let results = (outs CIR_BoolType:$result); + let assemblyFormat = [{ + $src `,` $flags `:` functional-type($src, $result) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Assume Operations //===----------------------------------------------------------------------===// @@ -4202,7 +4498,7 @@ def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> { When the `min` attribute is present, the operation returns the minimum guaranteed accessible size. When absent (max mode), it returns the maximum possible object size. Corresponds to `llvm.objectsize`'s `min` argument. - + The `dynamic` attribute determines if the value should be evaluated at runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument. @@ -4658,6 +4954,44 @@ def CIR_TryOp : CIR_Op<"try",[ let hasLLVMLowering = false; } +//===----------------------------------------------------------------------===// +// Exception related: EhInflightOp +//===----------------------------------------------------------------------===// + +def CIR_EhInflightOp : CIR_Op<"eh.inflight_exception"> { + let summary = "Materialize the catch clause formal parameter"; + let description = [{ + `cir.eh.inflight_exception` returns two values: + - `exception_ptr`: The exception pointer for the inflight exception + - `type_id`: the type info index for the exception type + This operation is expected to be the first operation in the unwind + destination basic blocks of a `cir.try_call` operation. + + The `cleanup` attribute indicates that clean up code must be run before the + values produced by this operation are used to dispatch the exception. This + cleanup code must be executed even if the exception is not caught. + This helps CIR to pass down more accurate information for LLVM lowering + to landingpads. + + Example: + + ```mlir + %exception_ptr, %type_id = cir.eh.inflight_exception + %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc] + %exception_ptr, %type_id = cir.eh.inflight_exception cleanup + `` + }]; + + let arguments = (ins UnitAttr:$cleanup, + OptionalAttr<FlatSymbolRefArrayAttr>:$catch_type_list); + let results = (outs CIR_VoidPtrType:$exception_ptr, CIR_UInt32:$type_id); + let assemblyFormat = [{ + (`cleanup` $cleanup^)? + ($catch_type_list^)? + attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Atomic operations //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index ee6900141647f..e91a9e4db229a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -121,20 +121,36 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy); case X86::BI_mm_prefetch: case X86::BI__rdtsc: - case X86::BI__builtin_ia32_rdtscp: + case X86::BI__builtin_ia32_rdtscp: { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: - case X86::BI__builtin_ia32_lzcnt_u64: + case X86::BI__builtin_ia32_lzcnt_u64: { + mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc())); + return emitIntrinsicCallOp(*this, expr, "ctlz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: - case X86::BI__builtin_ia32_tzcnt_u64: + case X86::BI__builtin_ia32_tzcnt_u64: { + mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc())); + return emitIntrinsicCallOp(*this, expr, "cttz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return builder.getNullValue(convertType(expr->getType()), + getLoc(expr->getExprLoc())); case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: @@ -169,10 +185,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vec_set_v16hi: case X86::BI__builtin_ia32_vec_set_v8si: case X86::BI__builtin_ia32_vec_set_v4di: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI_mm_setcsr: - case X86::BI__builtin_ia32_ldmxcsr: + case X86::BI__builtin_ia32_ldmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getArg(0)->getType(), loc); + builder.createStore(loc, ops[0], tmp); + return emitIntrinsicCallOp(*this, expr, "x86.sse.ldmxcsr", + builder.getVoidTy(), tmp.getPointer()); + } case X86::BI_mm_getcsr: - case X86::BI__builtin_ia32_stmxcsr: + case X86::BI__builtin_ia32_stmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getType(), loc); + emitIntrinsicCallOp(*this, expr, "x86.sse.stmxcsr", builder.getVoidTy(), + tmp.getPointer()); + return builder.createLoad(loc, tmp); + } case X86::BI__builtin_ia32_xsave: case X86::BI__builtin_ia32_xsave64: case X86::BI__builtin_ia32_xrstor: @@ -681,10 +713,24 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: + errorNYI("masked round sqrt builtins"); + return {}; case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtps: { + mlir::Location loc = getLoc(E->getExprLoc()); + assert(E->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument"); + mlir::Value arg = emitScalarExpr(E->getArg(0)); + mlir::Type argTy = arg.getType(); + if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) { + assert(vecTy.getNumElements() == 4 && + vecTy.getElementType().isa<mlir::FloatType>() && + "__builtin_ia32_sqrtps expects <4 x float> / __m128"); + } + auto sqrt = cir::SqrtOp > ::create(builder, loc, argTy, arg); + return sqrt.getResult(); + } case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d43a462a25092..937c66082ca40 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1,4 +1,4 @@ -//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// +//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -30,6 +30,7 @@ #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Transforms/DialectConversion.h" +#include "clang/Basic/LLVM.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" @@ -44,6 +45,96 @@ using namespace cir; using namespace llvm; +using namespace mlir; + +static std::string getLLVMIntrinsicNameForType(Type llvmTy) { + std::string s; + { + llvm::raw_string_ostream os(s); + llvm::Type *unused = nullptr; + os << llvmTy; + } + if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) { + } + return s; +} + +// Actual lowering +LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite( + cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const { + + Location loc = op.getLoc(); + MLIRContext *ctx = rewriter.getContext(); + + Type cirResTy = op.getResult().getType(); + Type llvmResTy = getTypeConverter()->convertType(cirResTy); + if (!llvmResTy) + return op.emitOpError( + "expected LLVM dialect result type for cir.sqrt lowering"); + + Value operand = adaptor.getInput(); + Value llvmOperand = operand; + if (operand.getType() != llvmResTy) { + llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand); + } + + // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result + std::string intrinsicName = "llvm.sqrt."; + std::string suffix; + + // If the CIR result type is a vector, include the 'vN' part in the suffix. + if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) { + Type elt = vec.getElementType(); + if (auto f = elt.dyn_cast<cir::FloatType>()) { + unsigned width = f.getWidth(); + unsigned n = vec.getNumElements(); + if (width == 32) + suffix = "v" + std::to_string(n) + "f32"; + else if (width == 64) + suffix = "v" + std::to_string(n) + "f64"; + else if (width == 16) + suffix = "v" + std::to_string(n) + "f16"; + else + return op.emitOpError("unsupported float width for sqrt"); + } else { + return op.emitOpError("vector element must be floating point for sqrt"); + } + } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) { + // Scalar float + unsigned width = f.getWidth(); + if (width == 32) + suffix = "f32"; + else if (width == 64) + suffix = "f64"; + else if (width == 16) + suffix = "f16"; + else + return op.emitOpError("unsupported float width for sqrt"); + } else { + return op.emitOpError("unsupported type for cir.sqrt lowering"); + } + + intrinsicName += suffix; + + // Ensure the llvm intrinsic function exists at module scope. Insert it at + // the start of the module body using an insertion guard. + ModuleOp module = op->getParentOfType<ModuleOp>(); + if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) { + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(module.getBody()); + auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy}, + /*isVarArg=*/false); + rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType); + } + + // Create the call and replace cir.sqrt + auto callee = SymbolRefAttr::get(ctx, intrinsicName); + rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee, + ArrayRef<Value>{llvmOperand}); + + return mlir::success(); +} namespace cir { namespace direct { @@ -284,7 +375,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow, memoryEffect = mlir::LLVM::MemoryEffectsAttr::get( callOp->getContext(), /*other=*/ModRefInfo::Ref, /*argMem=*/ModRefInfo::Ref, - /*inaccessibleMem=*/ModRefInfo::Ref); + /*inaccessibleMem=*/ModRefInfo::Ref, + /*errnoMem=*/ModRefInfo::Ref, + /*targetMem0=*/ModRefInfo::Ref, + /*targetMem1=*/ModRefInfo::Ref); noUnwind = true; willReturn = true; break; @@ -293,7 +387,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow, memoryEffect = mlir::LLVM::MemoryEffectsAttr::get( callOp->getContext(), /*other=*/ModRefInfo::NoModRef, /*argMem=*/ModRefInfo::NoModRef, - /*inaccessibleMem=*/ModRefInfo::NoModRef); + /*inaccessibleMem=*/ModRefInfo::NoModRef, + /*errnoMem=*/ModRefInfo::NoModRef, + /*targetMem0=*/ModRefInfo::NoModRef, + /*targetMem1=*/ModRefInfo::NoModRef); noUnwind = true; willReturn = true; break; @@ -670,6 +767,18 @@ mlir::LogicalResult CIRToLLVMASinOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMIsFPClassOpLowering::matchAndRewrite( + cir::IsFPClassOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Value src = adaptor.getSrc(); + cir::FPClassTest flags = adaptor.getFlags(); + mlir::IntegerType retTy = rewriter.getI1Type(); + + rewriter.replaceOpWithNewOp<mlir::LLVM::IsFPClass>( + op, retTy, src, static_cast<uint32_t>(flags)); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite( cir::AssumeOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -1995,7 +2104,6 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp( // attributes are available on cir.global ops. This duplicates code // in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go // away when the placeholders are no longer needed. - assert(!cir::MissingFeatures::opGlobalConstant()); const bool isConst = op.getConstant(); assert(!cir::MissingFeatures::addressSpace()); const unsigned addrSpace = 0; @@ -2055,8 +2163,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType); // FIXME: These default values are placeholders until the the equivalent // attributes are available on cir.global ops. - assert(!cir::MissingFeatures::opGlobalConstant()); - const bool isConst = false; + const bool isConst = op.getConstant(); assert(!cir::MissingFeatures::addressSpace()); const unsigned addrSpace = 0; const bool isDsoLocal = op.getDsoLocal(); @@ -2570,6 +2677,120 @@ mlir::LogicalResult CIRToLLVMCmpOpLowering::matchAndRewrite( return cmpOp.emitError() << "unsupported type for CmpOp: " << type; } +mlir::LogicalResult CIRToLLVMBinOpOverflowOpLowering::matchAndRewrite( + cir::BinOpOverflowOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Location loc = op.getLoc(); + cir::BinOpOverflowKind arithKind = op.getKind(); + cir::IntType operandTy = op.getLhs().getType(); + cir::IntType resultTy = op.getResult().getType(); + + EncompassedTypeInfo encompassedTyInfo = + computeEncompassedTypeWidth(operandTy, resultTy); + mlir::IntegerType encompassedLLVMTy = + rewriter.getIntegerType(encompassedTyInfo.width); + + mlir::Value lhs = adaptor.getLhs(); + mlir::Value rhs = adaptor.getRhs(); + if (operandTy.getWidth() < encompassedTyInfo.width) { + if (operandTy.isSigned()) { + lhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, lhs); + rhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, rhs); + } else { + lhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, lhs); + rhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, rhs); + } + } + + std::string intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign, + encompassedTyInfo.width); + auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName); + + mlir::IntegerType overflowLLVMTy = rewriter.getI1Type(); + auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral( + rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy}); + + auto callLLVMIntrinOp = mlir::LLVM::CallIntrinsicOp::create( + rewriter, loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs}); + mlir::Value intrinRet = callLLVMIntrinOp.getResult(0); + + mlir::Value result = mlir::LLVM::ExtractValueOp::create( + rewriter, loc, intrinRet, ArrayRef<int64_t>{0}) + .getResult(); + mlir::Value overflow = mlir::LLVM::ExtractValueOp::create( + rewriter, loc, intrinRet, ArrayRef<int64_t>{1}) + .getResult(); + + if (resultTy.getWidth() < encompassedTyInfo.width) { + mlir::Type resultLLVMTy = getTypeConverter()->convertType(resultTy); + auto truncResult = + mlir::LLVM::TruncOp::create(rewriter, loc, resultLLVMTy, result); + + // Extend the truncated result back to the encompassing type to check for + // any overflows during the truncation. + mlir::Value truncResultExt; + if (resultTy.isSigned()) + truncResultExt = mlir::LLVM::SExtOp::create( + rewriter, loc, encompassedLLVMTy, truncResult); + else + truncResultExt = mlir::LLVM::ZExtOp::create( + rewriter, loc, encompassedLLVMTy, truncResult); + auto truncOverflow = mlir::LLVM::ICmpOp::create( + rewriter, loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result); + + result = truncResult; + overflow = mlir::LLVM::OrOp::create(rewriter, loc, overflow, truncOverflow); + } + + mlir::Type boolLLVMTy = + getTypeConverter()->convertType(op.getOverflow().getType()); + if (boolLLVMTy != rewriter.getI1Type()) + overflow = mlir::LLVM::ZExtOp::create(rewriter, loc, boolLLVMTy, overflow); + + rewriter.replaceOp(op, mlir::ValueRange{result, overflow}); + + return mlir::success(); +} + +std::string CIRToLLVMBinOpOverflowOpLowering::getLLVMIntrinName( + cir::BinOpOverflowKind opKind, bool isSigned, unsigned width) { + // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}` + + std::string name = "llvm."; + + if (isSigned) + name.push_back('s'); + else + name.push_back('u'); + + switch (opKind) { + case cir::BinOpOverflowKind::Add: + name.append("add."); + break; + case cir::BinOpOverflowKind::Sub: + name.append("sub."); + break; + case cir::BinOpOverflowKind::Mul: + name.append("mul."); + break; + } + + name.append("with.overflow.i"); + name.append(std::to_string(width)); + + return name; +} + +CIRToLLVMBinOpOverflowOpLowering::EncompassedTypeInfo +CIRToLLVMBinOpOverflowOpLowering::computeEncompassedTypeWidth( + cir::IntType operandTy, cir::IntType resultTy) { + bool sign = operandTy.getIsSigned() || resultTy.getIsSigned(); + unsigned width = + std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()), + resultTy.getWidth() + (sign && resultTy.isUnsigned())); + return {sign, width}; +} + mlir::LogicalResult CIRToLLVMShiftOpLowering::matchAndRewrite( cir::ShiftOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -3100,6 +3321,90 @@ mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite( return mlir::success(); } +static mlir::LLVM::LLVMStructType +getLLVMLandingPadStructTy(mlir::ConversionPatternRewriter &rewriter) { + // Create the landing pad type: struct { ptr, i32 } + mlir::MLIRContext *ctx = rewriter.getContext(); + auto llvmPtr = mlir::LLVM::LLVMPointerType::get(ctx); + llvm::SmallVector<mlir::Type> structFields = {llvmPtr, rewriter.getI32Type()}; + return mlir::LLVM::LLVMStructType::getLiteral(ctx, structFields); +} + +mlir::LogicalResult CIRToLLVMEhInflightOpLowering::matchAndRewrite( + cir::EhInflightOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto llvmFn = op->getParentOfType<mlir::LLVM::LLVMFuncOp>(); + assert(llvmFn && "expected LLVM function parent"); + mlir::Block *entryBlock = &llvmFn.getRegion().front(); + assert(entryBlock->isEntryBlock()); + + mlir::ArrayAttr catchListAttr = op.getCatchTypeListAttr(); + mlir::SmallVector<mlir::Value> catchSymAddrs; + + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext()); + mlir::Location loc = op.getLoc(); + + // %landingpad = landingpad { ptr, i32 } + // Note that since llvm.landingpad has to be the first operation on the + // block, any needed value for its operands has to be added somewhere else. + if (catchListAttr) { + // catch ptr @_ZTIi + // catch ptr @_ZTIPKc + for (mlir::Attribute catchAttr : catchListAttr) { + auto symAttr = cast<mlir::FlatSymbolRefAttr>(catchAttr); + // Generate `llvm.mlir.addressof` for each symbol, and place those + // operations in the LLVM function entry basic block. + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(entryBlock); + mlir::Value addrOp = mlir::LLVM::AddressOfOp::create( + rewriter, loc, llvmPtrTy, symAttr.getValue()); + catchSymAddrs.push_back(addrOp); + } + } else if (!op.getCleanup()) { + // We need to emit catch-all only if cleanup is not set, because when we + // have catch-all handler, there is no case when we set would unwind past + // the handler + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(entryBlock); + mlir::Value nullOp = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy); + catchSymAddrs.push_back(nullOp); + } + + // %slot = extractvalue { ptr, i32 } %x, 0 + // %selector = extractvalue { ptr, i32 } %x, 1 + mlir::LLVM::LLVMStructType llvmLandingPadStructTy = + getLLVMLandingPadStructTy(rewriter); + auto landingPadOp = mlir::LLVM::LandingpadOp::create( + rewriter, loc, llvmLandingPadStructTy, catchSymAddrs); + + if (op.getCleanup()) + landingPadOp.setCleanup(true); + + mlir::Value slot = + mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 0); + mlir::Value selector = + mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 1); + rewriter.replaceOp(op, mlir::ValueRange{slot, selector}); + + // Landing pads are required to be in LLVM functions with personality + // attribute. + // TODO(cir): for now hardcode personality creation in order to start + // adding exception tests, once we annotate CIR with such information, + // change it to be in FuncOp lowering instead. + mlir::OpBuilder::InsertionGuard guard(rewriter); + // Insert personality decl before the current function. + rewriter.setInsertionPoint(llvmFn); + auto personalityFnTy = + mlir::LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {}, + /*isVarArg=*/true); + + const StringRef fnName = "__gxx_personality_v0"; + createLLVMFuncOpIfNotExist(rewriter, op, fnName, personalityFnTy); + llvmFn.setPersonality(fnName); + + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMTrapOpLowering::matchAndRewrite( cir::TrapOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -3843,6 +4148,12 @@ mlir::LogicalResult CIRToLLVMBlockAddressOpLowering::matchAndRewrite( return mlir::failure(); } +mlir::LogicalResult CIRToLLVMAwaitOpLowering::matchAndRewrite( + cir::AwaitOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + return mlir::failure(); +} + std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() { return std::make_unique<ConvertCIRToLLVMPass>(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 0591de545b81d..be6a380372efe 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -12,11 +12,25 @@ #ifndef CLANG_CIR_LOWERTOLLVM_H #define CLANG_CIR_LOWERTOLLVM_H +#include "mlir/Conversion/PatternRewriter.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Transforms/DialectConversion.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" +namespace cir { +class SqrtOp; +} + +class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> { +public: + using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override; +}; + namespace cir { namespace direct { diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c new file mode 100644 index 0000000000000..6e1dace82928c --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c @@ -0,0 +1,46 @@ +// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.) +// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s + +#include <immintrin.h> + +// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit) +__m128 test_sqrtps(__m128 x) { + return __builtin_ia32_sqrtps(x); +} +// CHECK-LABEL: cir.func @test_sqrtps +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit) +__m256 test_sqrtps256(__m256 x) { + return __builtin_ia32_sqrtps256(x); +} +// CHECK-LABEL: cir.func @test_sqrtps256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit) +__m512 test_sqrtps512(__m512 x) { + return __builtin_ia32_sqrtps512(x); +} +// CHECK-LABEL: cir.func @test_sqrtps512 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit) +__m128d test_sqrtpd(__m128d x) { + return __builtin_ia32_sqrtpd(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit) +__m256d test_sqrtpd256(__m256d x) { + return __builtin_ia32_sqrtpd256(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit) +__m512d test_sqrtpd512(__m512d x) { + return __builtin_ia32_sqrtpd512(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd512 +// CHECK: cir.sqrt >From 4a39fd7185cd294b96a4faadc2fa21f2a4d53b6b Mon Sep 17 00:00:00 2001 From: Priyanshu3820 <[email protected]> Date: Sat, 29 Nov 2025 09:59:40 +0530 Subject: [PATCH 2/2] Implement sqrt builtins for all vector sizes --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 20 ++---- .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 67 +++++++++++++++++++ 2 files changed, 73 insertions(+), 14 deletions(-) create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 45c0de322925a..f8a139ec7a8e0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -786,24 +786,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: { - mlir::Location loc = getLoc(expr->getExprLoc()); - assert(expr->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument"); - mlir::Value arg = emitScalarExpr(expr->getArg(0)); - mlir::Type argTy = arg.getType(); - if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) { - assert(vecTy.getNumElements() == 4 && - vecTy.getElementType().isa<mlir::FloatType>() && - "__builtin_ia32_sqrtps expects <4 x float> / __m128"); - } - auto sqrt = cir::SqrtOp::create(builder, loc, argTy, arg); - return sqrt.getResult(); - } + case X86::BI__builtin_ia32_sqrtps: case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: case X86::BI__builtin_ia32_sqrtps512: - case X86::BI__builtin_ia32_sqrtpd512: + case X86::BI__builtin_ia32_sqrtpd512: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value arg = ops[0]; + return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); + } case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: case X86::BI__builtin_ia32_pmuludq512: diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c new file mode 100644 index 0000000000000..ef5cb954e3efe --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c @@ -0,0 +1,67 @@ +// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.) +// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s + +#include <immintrin.h> + +// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit) +__m128 test_sqrtps(__m128 x) { + return __builtin_ia32_sqrtps(x); +} +// CHECK-LABEL: cir.func @test_sqrtps +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit) +__m256 test_sqrtps256(__m256 x) { + return __builtin_ia32_sqrtps256(x); +} +// CHECK-LABEL: cir.func @test_sqrtps256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit) +__m512 test_sqrtps512(__m512 x) { + return __builtin_ia32_sqrtps512(x); +} +// CHECK-LABEL: cir.func @test_sqrtps512 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit) +__m128d test_sqrtpd(__m128d x) { + return __builtin_ia32_sqrtpd(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit) +__m256d test_sqrtpd256(__m256d x) { + return __builtin_ia32_sqrtpd256(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit) +__m512d test_sqrtpd512(__m512d x) { + return __builtin_ia32_sqrtpd512(x); +} +// CHECK-LABEL: cir.func @test_sqrtpd512 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit) +__m128h test_sqrtph(__m128h x) { + return __builtin_ia32_sqrtph(x); +} +// CHECK-LABEL: cir.func @test_sqrtph +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit) +__m256h test_sqrtph256(__m256h x) { + return __builtin_ia32_sqrtph256(x); +} +// CHECK-LABEL: cir.func @test_sqrtph256 +// CHECK: cir.sqrt + +// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit) +__m512h test_sqrtph512(__m512h x) { + return __builtin_ia32_sqrtph512(x); +} +// CHECK-LABEL: cir.func @test_sqrtph512 +// CHECK: cir.sqrt \ No newline at end of file _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
