[clang] [CIR][X86] Implement lowering for sqrt builtins (PR #169310)

Priyanshu Kumar via cfe-commits Fri, 28 Nov 2025 23:17:52 -0800

https://github.com/Priyanshu3820 updated 
https://github.com/llvm/llvm-project/pull/169310


>From 627bcb3bde64a780ed2b9aaaa9267d97c9679f9c Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <[email protected]>
Date: Wed, 26 Nov 2025 17:45:00 +0530
Subject: [PATCH 1/2] Add CIR sqrt builtin support for X86

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  | 344 +++++++++++++++++-
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  66 +++-
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 323 +++++++++++++++-
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h   |  14 +
 .../CIR/CodeGen/X86/cir-sqrtps-builtins.c     |  46 +++
 5 files changed, 772 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td 
b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index e612d6a0ba886..291b035e6204c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -802,8 +802,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
 
//===----------------------------------------------------------------------===//
 
 defvar CIR_YieldableScopes = [
-  "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
-  "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
+  "ArrayCtor", "ArrayDtor", "AwaitOp", "CaseOp", "DoWhileOp", "ForOp",
+  "GlobalOp", "IfOp", "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
 ];
 
 def CIR_YieldOp : CIR_Op<"yield", [
@@ -1640,6 +1640,82 @@ def CIR_CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> {
   let isLLVMLoweringRecursive = true;
 }
 
+//===----------------------------------------------------------------------===//
+// BinOpOverflowOp
+//===----------------------------------------------------------------------===//
+
+def CIR_BinOpOverflowKind : CIR_I32EnumAttr<
+  "BinOpOverflowKind", "checked binary arithmetic operation kind", [
+    I32EnumAttrCase<"Add", 0, "add">,
+    I32EnumAttrCase<"Sub", 1, "sub">,
+    I32EnumAttrCase<"Mul", 2, "mul">
+]>;
+
+def CIR_BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> {
+  let summary = "Perform binary integral arithmetic with overflow checking";
+  let description = [{
+    `cir.binop.overflow` performs binary arithmetic operations with overflow
+    checking on integral operands.
+
+    The `kind` argument specifies the kind of arithmetic operation to perform.
+    It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments
+    specify the input operands of the arithmetic operation. The types of `lhs`
+    and `rhs` must be the same.
+
+    `cir.binop.overflow` produces two SSA values. `result` is the result of the
+    arithmetic operation truncated to its specified type. `overflow` is a
+    boolean value indicating whether overflow happens during the operation.
+
+    The exact semantic of this operation is as follows:
+
+      - `lhs` and `rhs` are promoted to an imaginary integral type that has
+        infinite precision.
+      - The arithmetic operation is performed on the promoted operands.
+      - The infinite-precision result is truncated to the type of `result`. The
+        truncated result is assigned to `result`.
+      - If the truncated result is equal to the un-truncated result, `overflow`
+        is assigned to false. Otherwise, `overflow` is assigned to true.
+  }];
+
+  let arguments = (ins
+    CIR_BinOpOverflowKind:$kind,
+    CIR_IntType:$lhs,
+    CIR_IntType:$rhs
+  );
+
+  let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,`
+    `(` qualified(type($result)) `,` qualified(type($overflow)) `)`
+    attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "cir::IntType":$resultTy,
+                   "cir::BinOpOverflowKind":$kind,
+                   "mlir::Value":$lhs,
+                   "mlir::Value":$rhs), [{
+      auto overflowTy = cir::BoolType::get($_builder.getContext());
+      build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs);
+    }]>
+  ];
+
+  let extraLLVMLoweringPatternDecl = [{
+    static std::string getLLVMIntrinName(cir::BinOpOverflowKind opKind,
+                                         bool isSigned, unsigned width);
+
+    struct EncompassedTypeInfo {
+      bool sign;
+      unsigned width;
+    };
+
+    static EncompassedTypeInfo computeEncompassedTypeWidth(cir::IntType 
operandTy,
+                                                           cir::IntType 
resultTy);
+  }];
+}
+
+
 
//===----------------------------------------------------------------------===//
 // BinOp
 
//===----------------------------------------------------------------------===//
@@ -2533,7 +2609,9 @@ def CIR_FuncOp : CIR_Op<"func", [
                        OptionalAttr<DictArrayAttr>:$res_attrs,
                        OptionalAttr<FlatSymbolRefAttr>:$aliasee,
                        CIR_OptionalPriorityAttr:$global_ctor_priority,
-                       CIR_OptionalPriorityAttr:$global_dtor_priority);
+                       CIR_OptionalPriorityAttr:$global_dtor_priority,
+                       
OptionalAttr<CIR_CXXSpecialMemberAttr>:$cxx_special_member
+   );
 
   let regions = (region AnyRegion:$body);
 
@@ -2572,7 +2650,32 @@ def CIR_FuncOp : CIR_Op<"func", [
     
//===------------------------------------------------------------------===//
 
     bool isDeclaration();
-  }];
+
+    
//===------------------------------------------------------------------===//
+    // C++ Special Member Functions
+    
//===------------------------------------------------------------------===//
+
+    /// Returns true if this function is a C++ special member function.
+    bool isCXXSpecialMemberFunction();
+
+    bool isCxxConstructor();
+    bool isCxxDestructor();
+
+    /// Returns true if this function is a copy or move assignment operator.
+    bool isCxxSpecialAssignment();
+
+    /// Returns the kind of constructor this function represents, if any.
+    std::optional<CtorKind> getCxxConstructorKind();
+
+    /// Returns the kind of assignment operator (move, copy) this function
+    /// represents, if any.
+    std::optional<AssignKind> getCxxSpecialAssignKind();
+
+    /// Returns true if the function is a trivial C++ member functions such as
+    /// trivial default constructor, copy/move constructor, copy/move 
assignment,
+    /// or destructor.
+    bool isCxxTrivialMemberFunction();
+}];
 
   let hasCustomAssemblyFormat = 1;
   let hasVerifier = 1;
@@ -2752,6 +2855,100 @@ def CIR_CallOp : CIR_CallOpBase<"call", 
[NoRegionArguments]> {
   ];
 }
 
+//===----------------------------------------------------------------------===//
+// AwaitOp
+//===----------------------------------------------------------------------===//
+
+def CIR_AwaitKind : CIR_I32EnumAttr<"AwaitKind", "await kind", [
+  I32EnumAttrCase<"Init", 0, "init">,
+  I32EnumAttrCase<"User", 1, "user">,
+  I32EnumAttrCase<"Yield", 2, "yield">,
+  I32EnumAttrCase<"Final", 3, "final">
+]>;
+
+def CIR_AwaitOp : CIR_Op<"await",[
+  DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+  RecursivelySpeculatable, NoRegionArguments
+]> {
+  let summary = "Wraps C++ co_await implicit logic";
+  let description = [{
+    The under the hood effect of using C++ `co_await expr` roughly
+    translates to:
+
+    ```c++
+    // co_await expr;
+
+    auto &&x = CommonExpr();
+    if (!x.await_ready()) {
+       ...
+       x.await_suspend(...);
+       ...
+    }
+    x.await_resume();
+    ```
+
+    `cir.await` represents this logic by using 3 regions:
+      - ready: covers veto power from x.await_ready()
+      - suspend: wraps actual x.await_suspend() logic
+      - resume: handles x.await_resume()
+
+    Breaking this up in regions allows individual scrutiny of conditions
+    which might lead to folding some of them out. Lowerings coming out
+    of CIR, e.g. LLVM, should use the `suspend` region to track more
+    lower level codegen (e.g. intrinsic emission for coro.save/coro.suspend).
+
+    There are also 4 flavors of `cir.await` available:
+    - `init`: compiler generated initial suspend via implicit `co_await`.
+    - `user`: also known as normal, representing a user written `co_await`.
+    - `yield`: user written `co_yield` expressions.
+    - `final`: compiler generated final suspend via implicit `co_await`.
+
+    ```mlir
+      cir.scope {
+        ... // auto &&x = CommonExpr();
+        cir.await(user, ready : {
+          ... // x.await_ready()
+        }, suspend : {
+          ... // x.await_suspend()
+        }, resume : {
+          ... // x.await_resume()
+        })
+      }
+    ```
+
+    Note that resulution of the common expression is assumed to happen
+    as part of the enclosing await scope.
+  }];
+
+  let arguments = (ins CIR_AwaitKind:$kind);
+  let regions = (region SizedRegion<1>:$ready,
+                        SizedRegion<1>:$suspend,
+                        SizedRegion<1>:$resume);
+  let assemblyFormat = [{
+    `(` $kind `,`
+    `ready` `:` $ready `,`
+    `suspend` `:` $suspend `,`
+    `resume` `:` $resume `,`
+    `)`
+    attr-dict
+  }];
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins
+      "cir::AwaitKind":$kind,
+      CArg<"BuilderCallbackRef",
+           "nullptr">:$readyBuilder,
+      CArg<"BuilderCallbackRef",
+           "nullptr">:$suspendBuilder,
+      CArg<"BuilderCallbackRef",
+           "nullptr">:$resumeBuilder
+      )>
+  ];
+
+  let hasVerifier = 1;
+}
+
 
//===----------------------------------------------------------------------===//
 // CopyOp
 
//===----------------------------------------------------------------------===//
@@ -2988,6 +3185,39 @@ def CIR_InlineAsmOp : CIR_Op<"asm", 
[RecursiveMemoryEffects]> {
   let hasCustomAssemblyFormat = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> {
+  let summary = "Floating-point square root";
+
+  let description = [{
+    The `cir.sqrt` operation computes the element-wise square root of its 
input.
+
+    The input must be either:
+      • a floating-point scalar type, or
+      • a vector whose element type is floating-point.
+
+    The result type must match the input type exactly.
+
+    Examples:
+      // scalar
+      %r = cir.sqrt %x : !cir.fp64
+
+      // vector
+      %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
+  }];
+
+  // input and output types: float or vector-of-float
+  let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input);
+  let results   = (outs CIR_AnyFloatOrVecOfFloatType:$result);
+
+  let assemblyFormat = [{
+    $input `:` type($input) attr-dict
+  }];
+}
+
 
//===----------------------------------------------------------------------===//
 // UnreachableOp
 
//===----------------------------------------------------------------------===//
@@ -4018,6 +4248,72 @@ def CIR_RotateOp : CIR_Op<"rotate", [Pure, 
SameOperandsAndResultType]> {
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// FPClass Test Flags
+//===----------------------------------------------------------------------===//
+
+def FPClassTestEnum : CIR_I32EnumAttr<"FPClassTest", "floating-point class 
test flags", [
+  // Basic flags
+  I32EnumAttrCase<"SignalingNaN", 1, "fcSNan">,
+  I32EnumAttrCase<"QuietNaN", 2, "fcQNan">,
+  I32EnumAttrCase<"NegativeInfinity", 4, "fcNegInf">,
+  I32EnumAttrCase<"NegativeNormal", 8, "fcNegNormal">,
+  I32EnumAttrCase<"NegativeSubnormal", 16, "fcNegSubnormal">,
+  I32EnumAttrCase<"NegativeZero", 32, "fcNegZero">,
+  I32EnumAttrCase<"PositiveZero", 64, "fcPosZero">,
+  I32EnumAttrCase<"PositiveSubnormal", 128, "fcPosSubnormal">,
+  I32EnumAttrCase<"PositiveNormal", 256, "fcPosNormal">,
+  I32EnumAttrCase<"PositiveInfinity", 512, "fcPosInf">,
+
+  // Composite flags
+  I32EnumAttrCase<"Nan", 3, "fcNan">,                   // fcSNan | fcQNan
+  I32EnumAttrCase<"Infinity", 516, "fcInf">,            // fcPosInf | fcNegInf
+  I32EnumAttrCase<"Normal", 264, "fcNormal">,           // fcPosNormal | 
fcNegNormal
+  I32EnumAttrCase<"Subnormal", 144, "fcSubnormal">,     // fcPosSubnormal | 
fcNegSubnormal
+  I32EnumAttrCase<"Zero", 96, "fcZero">,                // fcPosZero | 
fcNegZero
+  I32EnumAttrCase<"PositiveFinite", 448, "fcPosFinite">,// fcPosNormal | 
fcPosSubnormal | fcPosZero
+  I32EnumAttrCase<"NegativeFinite", 56, "fcNegFinite">, // fcNegNormal | 
fcNegSubnormal | fcNegZero
+  I32EnumAttrCase<"Finite", 504, "fcFinite">,           // fcPosFinite | 
fcNegFinite
+  I32EnumAttrCase<"Positive", 960, "fcPositive">,       // fcPosFinite | 
fcPosInf
+  I32EnumAttrCase<"Negative", 60, "fcNegative">,        // fcNegFinite | 
fcNegInf
+  I32EnumAttrCase<"All", 1023, "fcAllFlags">,           // fcNan | fcInf | 
fcFinite
+]> {
+  let cppNamespace = "::cir";
+}
+
+def CIR_IsFPClassOp : CIR_Op<"is_fp_class"> {
+  let summary = "Corresponding to the `__builtin_fpclassify` builtin function 
in clang";
+
+  let description = [{
+    The `cir.is_fp_class` operation takes a floating-point value as its first
+    argument and a bitfield of flags as its second argument. The operation
+    returns a boolean value indicating whether the floating-point value
+    satisfies the given flags.
+
+    The flags must be a compile time constant and the values are:
+
+    | Bit # | floating-point class |
+    | ----- | -------------------- |
+    |  0    | Signaling NaN        |
+    |  1    | Quiet NaN            |
+    |  2    | Negative infinity    |
+    |  3    | Negative normal      |
+    |  4    | Negative subnormal   |
+    |  5    | Negative zero        |
+    |  6    | Positive zero        |
+    |  7    | Positive subnormal   |
+    |  8    | Positive normal      |
+    |  9    | Positive infinity    |
+  }];
+
+  let arguments = (ins CIR_AnyFloatType:$src,
+                       FPClassTestEnum:$flags);
+  let results = (outs CIR_BoolType:$result);
+  let assemblyFormat = [{
+    $src `,` $flags `:` functional-type($src, $result) attr-dict
+  }];
+}
+
 
//===----------------------------------------------------------------------===//
 // Assume Operations
 
//===----------------------------------------------------------------------===//
@@ -4202,7 +4498,7 @@ def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> {
     When the `min` attribute is present, the operation returns the minimum
     guaranteed accessible size. When absent (max mode), it returns the maximum
     possible object size. Corresponds to `llvm.objectsize`'s `min` argument.
-    
+
     The `dynamic` attribute determines if the value should be evaluated at
     runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument.
 
@@ -4658,6 +4954,44 @@ def CIR_TryOp : CIR_Op<"try",[
   let hasLLVMLowering = false;
 }
 
+//===----------------------------------------------------------------------===//
+// Exception related: EhInflightOp
+//===----------------------------------------------------------------------===//
+
+def CIR_EhInflightOp : CIR_Op<"eh.inflight_exception"> {
+  let summary = "Materialize the catch clause formal parameter";
+  let description = [{
+    `cir.eh.inflight_exception` returns two values:
+      - `exception_ptr`: The exception pointer for the inflight exception
+      - `type_id`: the type info index for the exception type
+    This operation is expected to be the first operation in the unwind
+    destination basic blocks of a `cir.try_call` operation.
+
+    The `cleanup` attribute indicates that clean up code must be run before the
+    values produced by this operation are used to dispatch the exception. This
+    cleanup code must be executed even if the exception is not caught.
+    This helps CIR to pass down more accurate information for LLVM lowering
+    to landingpads.
+
+    Example:
+
+    ```mlir
+    %exception_ptr, %type_id = cir.eh.inflight_exception
+    %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    %exception_ptr, %type_id = cir.eh.inflight_exception cleanup
+    ``
+  }];
+
+  let arguments = (ins UnitAttr:$cleanup,
+                       OptionalAttr<FlatSymbolRefArrayAttr>:$catch_type_list);
+  let results = (outs CIR_VoidPtrType:$exception_ptr, CIR_UInt32:$type_id);
+  let assemblyFormat = [{
+    (`cleanup` $cleanup^)?
+    ($catch_type_list^)?
+    attr-dict
+  }];
+}
+
 
//===----------------------------------------------------------------------===//
 // Atomic operations
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index ee6900141647f..e91a9e4db229a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -121,20 +121,36 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
     return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
   case X86::BI_mm_prefetch:
   case X86::BI__rdtsc:
-  case X86::BI__builtin_ia32_rdtscp:
+  case X86::BI__builtin_ia32_rdtscp: {
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
+  }
   case X86::BI__builtin_ia32_lzcnt_u16:
   case X86::BI__builtin_ia32_lzcnt_u32:
-  case X86::BI__builtin_ia32_lzcnt_u64:
+  case X86::BI__builtin_ia32_lzcnt_u64: {
+    mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+    return emitIntrinsicCallOp(*this, expr, "ctlz", ops[0].getType(),
+                               mlir::ValueRange{ops[0], isZeroPoison});
+  }
   case X86::BI__builtin_ia32_tzcnt_u16:
   case X86::BI__builtin_ia32_tzcnt_u32:
-  case X86::BI__builtin_ia32_tzcnt_u64:
+  case X86::BI__builtin_ia32_tzcnt_u64: {
+    mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+    return emitIntrinsicCallOp(*this, expr, "cttz", ops[0].getType(),
+                               mlir::ValueRange{ops[0], isZeroPoison});
+  }
   case X86::BI__builtin_ia32_undef128:
   case X86::BI__builtin_ia32_undef256:
   case X86::BI__builtin_ia32_undef512:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented X86 builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinID));
-    return {};
+    // The x86 definition of "undef" is not the same as the LLVM definition
+    // (PR32176). We leave optimizing away an unnecessary zero constant to the
+    // IR optimizer and backend.
+    // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+    //  value, we should use that here instead of a zero.
+    return builder.getNullValue(convertType(expr->getType()),
+                                getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_vec_ext_v4hi:
   case X86::BI__builtin_ia32_vec_ext_v16qi:
   case X86::BI__builtin_ia32_vec_ext_v8hi:
@@ -169,10 +185,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_vec_set_v16hi:
   case X86::BI__builtin_ia32_vec_set_v8si:
   case X86::BI__builtin_ia32_vec_set_v4di:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI_mm_setcsr:
-  case X86::BI__builtin_ia32_ldmxcsr:
+  case X86::BI__builtin_ia32_ldmxcsr: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
+    builder.createStore(loc, ops[0], tmp);
+    return emitIntrinsicCallOp(*this, expr, "x86.sse.ldmxcsr",
+                               builder.getVoidTy(), tmp.getPointer());
+  }
   case X86::BI_mm_getcsr:
-  case X86::BI__builtin_ia32_stmxcsr:
+  case X86::BI__builtin_ia32_stmxcsr: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    Address tmp = createMemTemp(expr->getType(), loc);
+    emitIntrinsicCallOp(*this, expr, "x86.sse.stmxcsr", builder.getVoidTy(),
+                        tmp.getPointer());
+    return builder.createLoad(loc, tmp);
+  }
   case X86::BI__builtin_ia32_xsave:
   case X86::BI__builtin_ia32_xsave64:
   case X86::BI__builtin_ia32_xrstor:
@@ -681,10 +713,24 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_sqrtsh_round_mask:
   case X86::BI__builtin_ia32_sqrtsd_round_mask:
   case X86::BI__builtin_ia32_sqrtss_round_mask:
+    errorNYI("masked round sqrt builtins");
+    return {};
   case X86::BI__builtin_ia32_sqrtpd256:
   case X86::BI__builtin_ia32_sqrtpd:
   case X86::BI__builtin_ia32_sqrtps256:
-  case X86::BI__builtin_ia32_sqrtps:
+  case X86::BI__builtin_ia32_sqrtps: {
+    mlir::Location loc = getLoc(E->getExprLoc());
+    assert(E->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument");
+    mlir::Value arg = emitScalarExpr(E->getArg(0));
+    mlir::Type argTy = arg.getType();
+    if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
+      assert(vecTy.getNumElements() == 4 &&
+             vecTy.getElementType().isa<mlir::FloatType>() &&
+             "__builtin_ia32_sqrtps expects <4 x float> / __m128");
+    }
+    auto sqrt = cir::SqrtOp > ::create(builder, loc, argTy, arg);
+    return sqrt.getResult();
+  }
   case X86::BI__builtin_ia32_sqrtph256:
   case X86::BI__builtin_ia32_sqrtph:
   case X86::BI__builtin_ia32_sqrtph512:
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index d43a462a25092..937c66082ca40 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR 
---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR 
---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -30,6 +30,7 @@
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
@@ -44,6 +45,96 @@
 
 using namespace cir;
 using namespace llvm;
+using namespace mlir;
+
+static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
+  std::string s;
+  {
+    llvm::raw_string_ostream os(s);
+    llvm::Type *unused = nullptr;
+    os << llvmTy;
+  }
+  if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
+  }
+  return s;
+}
+
+// Actual lowering
+LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
+    cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+
+  Location loc = op.getLoc();
+  MLIRContext *ctx = rewriter.getContext();
+
+  Type cirResTy = op.getResult().getType();
+  Type llvmResTy = getTypeConverter()->convertType(cirResTy);
+  if (!llvmResTy)
+    return op.emitOpError(
+        "expected LLVM dialect result type for cir.sqrt lowering");
+
+  Value operand = adaptor.getInput();
+  Value llvmOperand = operand;
+  if (operand.getType() != llvmResTy) {
+    llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
+  }
+
+  // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
+  std::string intrinsicName = "llvm.sqrt.";
+  std::string suffix;
+
+  // If the CIR result type is a vector, include the 'vN' part in the suffix.
+  if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) {
+    Type elt = vec.getElementType();
+    if (auto f = elt.dyn_cast<cir::FloatType>()) {
+      unsigned width = f.getWidth();
+      unsigned n = vec.getNumElements();
+      if (width == 32)
+        suffix = "v" + std::to_string(n) + "f32";
+      else if (width == 64)
+        suffix = "v" + std::to_string(n) + "f64";
+      else if (width == 16)
+        suffix = "v" + std::to_string(n) + "f16";
+      else
+        return op.emitOpError("unsupported float width for sqrt");
+    } else {
+      return op.emitOpError("vector element must be floating point for sqrt");
+    }
+  } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
+    // Scalar float
+    unsigned width = f.getWidth();
+    if (width == 32)
+      suffix = "f32";
+    else if (width == 64)
+      suffix = "f64";
+    else if (width == 16)
+      suffix = "f16";
+    else
+      return op.emitOpError("unsupported float width for sqrt");
+  } else {
+    return op.emitOpError("unsupported type for cir.sqrt lowering");
+  }
+
+  intrinsicName += suffix;
+
+  // Ensure the llvm intrinsic function exists at module scope. Insert it at
+  // the start of the module body using an insertion guard.
+  ModuleOp module = op->getParentOfType<ModuleOp>();
+  if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
+    OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPointToStart(module.getBody());
+    auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy},
+                                                    /*isVarArg=*/false);
+    rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
+  }
+
+  // Create the call and replace cir.sqrt
+  auto callee = SymbolRefAttr::get(ctx, intrinsicName);
+  rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee,
+                                            ArrayRef<Value>{llvmOperand});
+
+  return mlir::success();
+}
 
 namespace cir {
 namespace direct {
@@ -284,7 +375,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, 
bool isNothrow,
     memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
         callOp->getContext(), /*other=*/ModRefInfo::Ref,
         /*argMem=*/ModRefInfo::Ref,
-        /*inaccessibleMem=*/ModRefInfo::Ref);
+        /*inaccessibleMem=*/ModRefInfo::Ref,
+        /*errnoMem=*/ModRefInfo::Ref,
+        /*targetMem0=*/ModRefInfo::Ref,
+        /*targetMem1=*/ModRefInfo::Ref);
     noUnwind = true;
     willReturn = true;
     break;
@@ -293,7 +387,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, 
bool isNothrow,
     memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
         callOp->getContext(), /*other=*/ModRefInfo::NoModRef,
         /*argMem=*/ModRefInfo::NoModRef,
-        /*inaccessibleMem=*/ModRefInfo::NoModRef);
+        /*inaccessibleMem=*/ModRefInfo::NoModRef,
+        /*errnoMem=*/ModRefInfo::NoModRef,
+        /*targetMem0=*/ModRefInfo::NoModRef,
+        /*targetMem1=*/ModRefInfo::NoModRef);
     noUnwind = true;
     willReturn = true;
     break;
@@ -670,6 +767,18 @@ mlir::LogicalResult 
CIRToLLVMASinOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMIsFPClassOpLowering::matchAndRewrite(
+    cir::IsFPClassOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Value src = adaptor.getSrc();
+  cir::FPClassTest flags = adaptor.getFlags();
+  mlir::IntegerType retTy = rewriter.getI1Type();
+
+  rewriter.replaceOpWithNewOp<mlir::LLVM::IsFPClass>(
+      op, retTy, src, static_cast<uint32_t>(flags));
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite(
     cir::AssumeOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -1995,7 +2104,6 @@ void 
CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp(
   //        attributes are available on cir.global ops. This duplicates code
   //        in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go
   //        away when the placeholders are no longer needed.
-  assert(!cir::MissingFeatures::opGlobalConstant());
   const bool isConst = op.getConstant();
   assert(!cir::MissingFeatures::addressSpace());
   const unsigned addrSpace = 0;
@@ -2055,8 +2163,7 @@ mlir::LogicalResult 
CIRToLLVMGlobalOpLowering::matchAndRewrite(
       convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType);
   // FIXME: These default values are placeholders until the the equivalent
   //        attributes are available on cir.global ops.
-  assert(!cir::MissingFeatures::opGlobalConstant());
-  const bool isConst = false;
+  const bool isConst = op.getConstant();
   assert(!cir::MissingFeatures::addressSpace());
   const unsigned addrSpace = 0;
   const bool isDsoLocal = op.getDsoLocal();
@@ -2570,6 +2677,120 @@ mlir::LogicalResult 
CIRToLLVMCmpOpLowering::matchAndRewrite(
   return cmpOp.emitError() << "unsupported type for CmpOp: " << type;
 }
 
+mlir::LogicalResult CIRToLLVMBinOpOverflowOpLowering::matchAndRewrite(
+    cir::BinOpOverflowOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Location loc = op.getLoc();
+  cir::BinOpOverflowKind arithKind = op.getKind();
+  cir::IntType operandTy = op.getLhs().getType();
+  cir::IntType resultTy = op.getResult().getType();
+
+  EncompassedTypeInfo encompassedTyInfo =
+      computeEncompassedTypeWidth(operandTy, resultTy);
+  mlir::IntegerType encompassedLLVMTy =
+      rewriter.getIntegerType(encompassedTyInfo.width);
+
+  mlir::Value lhs = adaptor.getLhs();
+  mlir::Value rhs = adaptor.getRhs();
+  if (operandTy.getWidth() < encompassedTyInfo.width) {
+    if (operandTy.isSigned()) {
+      lhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+      rhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+    } else {
+      lhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+      rhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+    }
+  }
+
+  std::string intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign,
+                                             encompassedTyInfo.width);
+  auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName);
+
+  mlir::IntegerType overflowLLVMTy = rewriter.getI1Type();
+  auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral(
+      rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy});
+
+  auto callLLVMIntrinOp = mlir::LLVM::CallIntrinsicOp::create(
+      rewriter, loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs});
+  mlir::Value intrinRet = callLLVMIntrinOp.getResult(0);
+
+  mlir::Value result = mlir::LLVM::ExtractValueOp::create(
+                           rewriter, loc, intrinRet, ArrayRef<int64_t>{0})
+                           .getResult();
+  mlir::Value overflow = mlir::LLVM::ExtractValueOp::create(
+                             rewriter, loc, intrinRet, ArrayRef<int64_t>{1})
+                             .getResult();
+
+  if (resultTy.getWidth() < encompassedTyInfo.width) {
+    mlir::Type resultLLVMTy = getTypeConverter()->convertType(resultTy);
+    auto truncResult =
+        mlir::LLVM::TruncOp::create(rewriter, loc, resultLLVMTy, result);
+
+    // Extend the truncated result back to the encompassing type to check for
+    // any overflows during the truncation.
+    mlir::Value truncResultExt;
+    if (resultTy.isSigned())
+      truncResultExt = mlir::LLVM::SExtOp::create(
+          rewriter, loc, encompassedLLVMTy, truncResult);
+    else
+      truncResultExt = mlir::LLVM::ZExtOp::create(
+          rewriter, loc, encompassedLLVMTy, truncResult);
+    auto truncOverflow = mlir::LLVM::ICmpOp::create(
+        rewriter, loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result);
+
+    result = truncResult;
+    overflow = mlir::LLVM::OrOp::create(rewriter, loc, overflow, 
truncOverflow);
+  }
+
+  mlir::Type boolLLVMTy =
+      getTypeConverter()->convertType(op.getOverflow().getType());
+  if (boolLLVMTy != rewriter.getI1Type())
+    overflow = mlir::LLVM::ZExtOp::create(rewriter, loc, boolLLVMTy, overflow);
+
+  rewriter.replaceOp(op, mlir::ValueRange{result, overflow});
+
+  return mlir::success();
+}
+
+std::string CIRToLLVMBinOpOverflowOpLowering::getLLVMIntrinName(
+    cir::BinOpOverflowKind opKind, bool isSigned, unsigned width) {
+  // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}`
+
+  std::string name = "llvm.";
+
+  if (isSigned)
+    name.push_back('s');
+  else
+    name.push_back('u');
+
+  switch (opKind) {
+  case cir::BinOpOverflowKind::Add:
+    name.append("add.");
+    break;
+  case cir::BinOpOverflowKind::Sub:
+    name.append("sub.");
+    break;
+  case cir::BinOpOverflowKind::Mul:
+    name.append("mul.");
+    break;
+  }
+
+  name.append("with.overflow.i");
+  name.append(std::to_string(width));
+
+  return name;
+}
+
+CIRToLLVMBinOpOverflowOpLowering::EncompassedTypeInfo
+CIRToLLVMBinOpOverflowOpLowering::computeEncompassedTypeWidth(
+    cir::IntType operandTy, cir::IntType resultTy) {
+  bool sign = operandTy.getIsSigned() || resultTy.getIsSigned();
+  unsigned width =
+      std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()),
+               resultTy.getWidth() + (sign && resultTy.isUnsigned()));
+  return {sign, width};
+}
+
 mlir::LogicalResult CIRToLLVMShiftOpLowering::matchAndRewrite(
     cir::ShiftOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -3100,6 +3321,90 @@ mlir::LogicalResult 
CIRToLLVMAllocExceptionOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+static mlir::LLVM::LLVMStructType
+getLLVMLandingPadStructTy(mlir::ConversionPatternRewriter &rewriter) {
+  // Create the landing pad type: struct { ptr, i32 }
+  mlir::MLIRContext *ctx = rewriter.getContext();
+  auto llvmPtr = mlir::LLVM::LLVMPointerType::get(ctx);
+  llvm::SmallVector<mlir::Type> structFields = {llvmPtr, 
rewriter.getI32Type()};
+  return mlir::LLVM::LLVMStructType::getLiteral(ctx, structFields);
+}
+
+mlir::LogicalResult CIRToLLVMEhInflightOpLowering::matchAndRewrite(
+    cir::EhInflightOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto llvmFn = op->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+  assert(llvmFn && "expected LLVM function parent");
+  mlir::Block *entryBlock = &llvmFn.getRegion().front();
+  assert(entryBlock->isEntryBlock());
+
+  mlir::ArrayAttr catchListAttr = op.getCatchTypeListAttr();
+  mlir::SmallVector<mlir::Value> catchSymAddrs;
+
+  auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+  mlir::Location loc = op.getLoc();
+
+  // %landingpad = landingpad { ptr, i32 }
+  // Note that since llvm.landingpad has to be the first operation on the
+  // block, any needed value for its operands has to be added somewhere else.
+  if (catchListAttr) {
+    //   catch ptr @_ZTIi
+    //   catch ptr @_ZTIPKc
+    for (mlir::Attribute catchAttr : catchListAttr) {
+      auto symAttr = cast<mlir::FlatSymbolRefAttr>(catchAttr);
+      // Generate `llvm.mlir.addressof` for each symbol, and place those
+      // operations in the LLVM function entry basic block.
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPointToStart(entryBlock);
+      mlir::Value addrOp = mlir::LLVM::AddressOfOp::create(
+          rewriter, loc, llvmPtrTy, symAttr.getValue());
+      catchSymAddrs.push_back(addrOp);
+    }
+  } else if (!op.getCleanup()) {
+    // We need to emit catch-all only if cleanup is not set, because when we
+    // have catch-all handler, there is no case when we set would unwind past
+    // the handler
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPointToStart(entryBlock);
+    mlir::Value nullOp = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy);
+    catchSymAddrs.push_back(nullOp);
+  }
+
+  // %slot = extractvalue { ptr, i32 } %x, 0
+  // %selector = extractvalue { ptr, i32 } %x, 1
+  mlir::LLVM::LLVMStructType llvmLandingPadStructTy =
+      getLLVMLandingPadStructTy(rewriter);
+  auto landingPadOp = mlir::LLVM::LandingpadOp::create(
+      rewriter, loc, llvmLandingPadStructTy, catchSymAddrs);
+
+  if (op.getCleanup())
+    landingPadOp.setCleanup(true);
+
+  mlir::Value slot =
+      mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 0);
+  mlir::Value selector =
+      mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 1);
+  rewriter.replaceOp(op, mlir::ValueRange{slot, selector});
+
+  // Landing pads are required to be in LLVM functions with personality
+  // attribute.
+  // TODO(cir): for now hardcode personality creation in order to start
+  // adding exception tests, once we annotate CIR with such information,
+  // change it to be in FuncOp lowering instead.
+  mlir::OpBuilder::InsertionGuard guard(rewriter);
+  // Insert personality decl before the current function.
+  rewriter.setInsertionPoint(llvmFn);
+  auto personalityFnTy =
+      mlir::LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {},
+                                        /*isVarArg=*/true);
+
+  const StringRef fnName = "__gxx_personality_v0";
+  createLLVMFuncOpIfNotExist(rewriter, op, fnName, personalityFnTy);
+  llvmFn.setPersonality(fnName);
+
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMTrapOpLowering::matchAndRewrite(
     cir::TrapOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -3843,6 +4148,12 @@ mlir::LogicalResult 
CIRToLLVMBlockAddressOpLowering::matchAndRewrite(
   return mlir::failure();
 }
 
+mlir::LogicalResult CIRToLLVMAwaitOpLowering::matchAndRewrite(
+    cir::AwaitOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  return mlir::failure();
+}
+
 std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() {
   return std::make_unique<ConvertCIRToLLVMPass>();
 }
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 0591de545b81d..be6a380372efe 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -12,11 +12,25 @@
 #ifndef CLANG_CIR_LOWERTOLLVM_H
 #define CLANG_CIR_LOWERTOLLVM_H
 
+#include "mlir/Conversion/PatternRewriter.h"
 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 
+namespace cir {
+class SqrtOp;
+}
+
+class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> {
+public:
+  using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override;
+};
+
 namespace cir {
 
 namespace direct {
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c 
b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
new file mode 100644
index 0000000000000..6e1dace82928c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
@@ -0,0 +1,46 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.)
+// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o 
- | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+  return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+  return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+  return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+  return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+  return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+  return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt

>From 4a39fd7185cd294b96a4faadc2fa21f2a4d53b6b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <[email protected]>
Date: Sat, 29 Nov 2025 09:59:40 +0530
Subject: [PATCH 2/2] Implement sqrt builtins for all vector sizes

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 20 ++----
 .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c  | 67 +++++++++++++++++++
 2 files changed, 73 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 45c0de322925a..f8a139ec7a8e0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -786,24 +786,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_sqrtpd256:
   case X86::BI__builtin_ia32_sqrtpd:
   case X86::BI__builtin_ia32_sqrtps256:
-  case X86::BI__builtin_ia32_sqrtps: {
-    mlir::Location loc = getLoc(expr->getExprLoc());
-    assert(expr->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one 
argument");
-    mlir::Value arg = emitScalarExpr(expr->getArg(0));
-    mlir::Type argTy = arg.getType();
-    if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
-      assert(vecTy.getNumElements() == 4 &&
-             vecTy.getElementType().isa<mlir::FloatType>() &&
-             "__builtin_ia32_sqrtps expects <4 x float> / __m128");
-    }
-    auto sqrt = cir::SqrtOp::create(builder, loc, argTy, arg);
-    return sqrt.getResult();
-  }
+  case X86::BI__builtin_ia32_sqrtps:
   case X86::BI__builtin_ia32_sqrtph256:
   case X86::BI__builtin_ia32_sqrtph:
   case X86::BI__builtin_ia32_sqrtph512:
   case X86::BI__builtin_ia32_sqrtps512:
-  case X86::BI__builtin_ia32_sqrtpd512:
+  case X86::BI__builtin_ia32_sqrtpd512: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    mlir::Value arg = ops[0];
+    return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+  }
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:
   case X86::BI__builtin_ia32_pmuludq512:
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c 
b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
new file mode 100644
index 0000000000000..ef5cb954e3efe
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -0,0 +1,67 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.)
+// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature 
+avx512fp16 -emit-cir %s -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+  return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+  return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+  return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+  return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+  return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+  return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit)
+__m128h test_sqrtph(__m128h x) {
+  return __builtin_ia32_sqrtph(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit)
+__m256h test_sqrtph256(__m256h x) {
+  return __builtin_ia32_sqrtph256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit)
+__m512h test_sqrtph512(__m512h x) {
+  return __builtin_ia32_sqrtph512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph512
+// CHECK: cir.sqrt
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][X86] Implement lowering for sqrt builtins (PR #169310)

Reply via email to