[llvm-branch-commits] [llvm] [LLVM][ARM] Add native ct.select support for ARM32 and Thumb (PR #166707)

via llvm-branch-commits Wed, 05 Nov 2025 21:48:23 -0800

github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->



:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- 
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.h 
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp 
llvm/lib/Target/ARM/ARMISelLowering.h llvm/lib/Target/ARM/ARMTargetMachine.cpp 
--diff_from_common_commit
``````````

:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp 
b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6e2aaa9fc..6d8a3b722 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1552,8 +1552,9 @@ bool ARMBaseInstrInfo::expandCtSelectVector(MachineInstr 
&MI) const {
 
   unsigned RsbOp = Subtarget.isThumb2() ? ARM::t2RSBri : ARM::RSBri;
 
-  // Any vector pseudo has: ((outs $dst, $tmp_mask, $bcast_mask), (ins $src1, 
$src2, $cond))
-  Register VectorMaskReg = MI.getOperand(2).getReg(); 
+  // Any vector pseudo has: ((outs $dst, $tmp_mask, $bcast_mask), (ins $src1,
+  // $src2, $cond))
+  Register VectorMaskReg = MI.getOperand(2).getReg();
   Register Src1Reg = MI.getOperand(3).getReg();
   Register Src2Reg = MI.getOperand(4).getReg();
   Register CondReg = MI.getOperand(5).getReg();
@@ -1564,47 +1565,46 @@ bool 
ARMBaseInstrInfo::expandCtSelectVector(MachineInstr &MI) const {
   // When cond = 0: mask = 0x00000000.
   // When cond = 1: mask = 0xFFFFFFFF.
 
-  MachineInstr *FirstNewMI =
-    BuildMI(*MBB, MI, DL, get(RsbOp), MaskReg)
-    .addReg(CondReg)
-    .addImm(0)
-    .add(predOps(ARMCC::AL))
-    .add(condCodeOp())
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
-  
+  MachineInstr *FirstNewMI = BuildMI(*MBB, MI, DL, get(RsbOp), MaskReg)
+                                 .addReg(CondReg)
+                                 .addImm(0)
+                                 .add(predOps(ARMCC::AL))
+                                 .add(condCodeOp())
+                                 .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
   // 2. A = src1 & mask
   // For vectors, broadcast the scalar mask so it matches operand size.
   BuildMI(*MBB, MI, DL, get(BroadcastOp), VectorMaskReg)
-    .addReg(MaskReg)
-    .add(predOps(ARMCC::AL))
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(MaskReg)
+      .add(predOps(ARMCC::AL))
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   BuildMI(*MBB, MI, DL, get(AndOp), DestReg)
-    .addReg(Src1Reg)
-    .addReg(VectorMaskReg)
-    .add(predOps(ARMCC::AL))
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(Src1Reg)
+      .addReg(VectorMaskReg)
+      .add(predOps(ARMCC::AL))
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // 3. B = src2 & ~mask
   BuildMI(*MBB, MI, DL, get(BicOp), VectorMaskReg)
-    .addReg(Src2Reg)
-    .addReg(VectorMaskReg)
-    .add(predOps(ARMCC::AL))
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(Src2Reg)
+      .addReg(VectorMaskReg)
+      .add(predOps(ARMCC::AL))
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // 4. result = A | B
   auto LastNewMI = BuildMI(*MBB, MI, DL, get(OrrOp), DestReg)
-    .addReg(DestReg)
-    .addReg(VectorMaskReg)
-    .add(predOps(ARMCC::AL))
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+                       .addReg(DestReg)
+                       .addReg(VectorMaskReg)
+                       .add(predOps(ARMCC::AL))
+                       .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   auto BundleStart = FirstNewMI->getIterator();
   auto BundleEnd = LastNewMI->getIterator();
 
   // Add instruction bundling
   finalizeBundle(*MBB, BundleStart, std::next(BundleEnd));
-  
+
   MI.eraseFromParent();
   return true;
 }
@@ -1614,8 +1614,8 @@ bool ARMBaseInstrInfo::expandCtSelectThumb(MachineInstr 
&MI) const {
   MachineBasicBlock *MBB = MI.getParent();
   DebugLoc DL = MI.getDebugLoc();
 
-  // pseudos in thumb1 mode have: (outs $dst, $tmp_mask), (ins $src1, $src2, 
$cond))
-  // register class here is always tGPR.
+  // pseudos in thumb1 mode have: (outs $dst, $tmp_mask), (ins $src1, $src2,
+  // $cond)) register class here is always tGPR.
   Register DestReg = MI.getOperand(0).getReg();
   Register MaskReg = MI.getOperand(1).getReg();
   Register Src1Reg = MI.getOperand(2).getReg();
@@ -1631,60 +1631,64 @@ bool ARMBaseInstrInfo::expandCtSelectThumb(MachineInstr 
&MI) const {
   unsigned ShiftAmount = RegSize - 1;
 
   // Option 1: Shift-based mask (preferred - no flag modification)
-  MachineInstr *FirstNewMI =
-    BuildMI(*MBB, MI, DL, get(ARM::tMOVr), MaskReg)
-    .addReg(CondReg)
-    .add(predOps(ARMCC::AL))
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
-
-  // Instead of using RSB, we can use LSL and ASR to get the mask. This is to 
avoid the flag modification caused by RSB.
-  // tLSLri: (outs tGPR:$Rd, s_cc_out:$s), (ins tGPR:$Rm, imm0_31:$imm5, 
pred:$p)
+  MachineInstr *FirstNewMI = BuildMI(*MBB, MI, DL, get(ARM::tMOVr), MaskReg)
+                                 .addReg(CondReg)
+                                 .add(predOps(ARMCC::AL))
+                                 .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+  // Instead of using RSB, we can use LSL and ASR to get the mask. This is to
+  // avoid the flag modification caused by RSB. tLSLri: (outs tGPR:$Rd,
+  // s_cc_out:$s), (ins tGPR:$Rm, imm0_31:$imm5, pred:$p)
   BuildMI(*MBB, MI, DL, get(ARM::tLSLri), MaskReg)
-    .addReg(ARM::CPSR, RegState::Define | RegState::Dead)  // s_cc_out:$s
-    .addReg(MaskReg)  // $Rm
-    .addImm(ShiftAmount)  // imm0_31:$imm5
-    .add(predOps(ARMCC::AL))  // pred:$p
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+      .addReg(MaskReg)                                      // $Rm
+      .addImm(ShiftAmount)                                  // imm0_31:$imm5
+      .add(predOps(ARMCC::AL))                              // pred:$p
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // tASRri: (outs tGPR:$Rd, s_cc_out:$s), (ins tGPR:$Rm, imm_sr:$imm5, 
pred:$p)
   BuildMI(*MBB, MI, DL, get(ARM::tASRri), MaskReg)
-    .addReg(ARM::CPSR, RegState::Define | RegState::Dead)  // s_cc_out:$s
-    .addReg(MaskReg)  // $Rm
-    .addImm(ShiftAmount)  // imm_sr:$imm5
-    .add(predOps(ARMCC::AL))  // pred:$p
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+      .addReg(MaskReg)                                      // $Rm
+      .addImm(ShiftAmount)                                  // imm_sr:$imm5
+      .add(predOps(ARMCC::AL))                              // pred:$p
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // 2. xor_diff = src1 ^ src2
   BuildMI(*MBB, MI, DL, get(ARM::tMOVr), DestReg)
-    .addReg(Src1Reg)
-    .add(predOps(ARMCC::AL))
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(Src1Reg)
+      .add(predOps(ARMCC::AL))
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
-  // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn, 
pred:$p) with constraint "$Rn = $Rdn"
+  // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn,
+  // pred:$p) with constraint "$Rn = $Rdn"
   BuildMI(*MBB, MI, DL, get(ARM::tEOR), DestReg)
-    .addReg(ARM::CPSR, RegState::Define | RegState::Dead)  // s_cc_out:$s
-    .addReg(DestReg)  // tied input $Rn
-    .addReg(Src2Reg)  // $Rm
-    .add(predOps(ARMCC::AL))  // pred:$p
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+      .addReg(DestReg)                                      // tied input $Rn
+      .addReg(Src2Reg)                                      // $Rm
+      .add(predOps(ARMCC::AL))                              // pred:$p
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // 3. masked_xor = xor_diff & mask
-  // tAND has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn, 
pred:$p) with constraint "$Rn = $Rdn"
+  // tAND has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn,
+  // pred:$p) with constraint "$Rn = $Rdn"
   BuildMI(*MBB, MI, DL, get(ARM::tAND), DestReg)
-    .addReg(ARM::CPSR, RegState::Define | RegState::Dead)  // s_cc_out:$s
-    .addReg(DestReg)  // tied input $Rn
-    .addReg(MaskReg, RegState::Kill)  // $Rm
-    .add(predOps(ARMCC::AL))  // pred:$p
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+      .addReg(DestReg)                                      // tied input $Rn
+      .addReg(MaskReg, RegState::Kill)                      // $Rm
+      .add(predOps(ARMCC::AL))                              // pred:$p
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // 4. result = src2 ^ masked_xor
-  // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn, 
pred:$p) with constraint "$Rn = $Rdn"
-  auto LastMI = BuildMI(*MBB, MI, DL, get(ARM::tEOR), DestReg)
-    .addReg(ARM::CPSR, RegState::Define | RegState::Dead)  // s_cc_out:$s
-    .addReg(DestReg)  // tied input $Rn
-    .addReg(Src2Reg)  // $Rm
-    .add(predOps(ARMCC::AL))  // pred:$p
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+  // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn,
+  // pred:$p) with constraint "$Rn = $Rdn"
+  auto LastMI =
+      BuildMI(*MBB, MI, DL, get(ARM::tEOR), DestReg)
+          .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+          .addReg(DestReg)         // tied input $Rn
+          .addReg(Src2Reg)         // $Rm
+          .add(predOps(ARMCC::AL)) // pred:$p
+          .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // Add instruction bundling
   auto BundleStart = FirstNewMI->getIterator();
@@ -1718,39 +1722,42 @@ bool ARMBaseInstrInfo::expandCtSelect(MachineInstr &MI) 
const {
   }
 
   unsigned Opcode = MI.getOpcode();
-  bool IsFloat = Opcode == ARM::CTSELECTf32 || Opcode == ARM::CTSELECTf16 || 
Opcode == ARM::CTSELECTbf16;
+  bool IsFloat = Opcode == ARM::CTSELECTf32 || Opcode == ARM::CTSELECTf16 ||
+                 Opcode == ARM::CTSELECTbf16;
   MachineInstr *FirstNewMI = nullptr;
   if (IsFloat) {
-    // Each float pseudo has: (outs $dst, $tmp_mask, $scratch1, $scratch2), 
(ins $src1, $src2, $cond))
-    // We use two scratch registers in tablegen for bitwise ops on float 
types,.
-     Register GPRScratch1 = MI.getOperand(2).getReg();
-     Register GPRScratch2 = MI.getOperand(3).getReg();
-     
-     // choice a from __builtin_ct_select(cond, a, b)
-     Src1Reg = MI.getOperand(4).getReg();
-     // choice b from __builtin_ct_select(cond, a, b)
-     Src2Reg = MI.getOperand(5).getReg();
-     // cond from __builtin_ct_select(cond, a, b)
-     CondReg = MI.getOperand(6).getReg();
+    // Each float pseudo has: (outs $dst, $tmp_mask, $scratch1, $scratch2), 
(ins
+    // $src1, $src2, $cond)) We use two scratch registers in tablegen for
+    // bitwise ops on float types,.
+    Register GPRScratch1 = MI.getOperand(2).getReg();
+    Register GPRScratch2 = MI.getOperand(3).getReg();
+
+    // choice a from __builtin_ct_select(cond, a, b)
+    Src1Reg = MI.getOperand(4).getReg();
+    // choice b from __builtin_ct_select(cond, a, b)
+    Src2Reg = MI.getOperand(5).getReg();
+    // cond from __builtin_ct_select(cond, a, b)
+    CondReg = MI.getOperand(6).getReg();
 
     // Move fp src1 to GPR scratch1 so we can do our bitwise ops
     FirstNewMI = BuildMI(*MBB, MI, DL, get(ARM::VMOVRS), GPRScratch1)
-        .addReg(Src1Reg)
-        .add(predOps(ARMCC::AL))
-        .setMIFlag(MachineInstr::MIFlag::NoMerge);
-      
+                     .addReg(Src1Reg)
+                     .add(predOps(ARMCC::AL))
+                     .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
     // Move src2 to scratch2
     BuildMI(*MBB, MI, DL, get(ARM::VMOVRS), GPRScratch2)
-      .addReg(Src2Reg)
-      .add(predOps(ARMCC::AL))
-      .setMIFlag(MachineInstr::MIFlag::NoMerge);
-    
+        .addReg(Src2Reg)
+        .add(predOps(ARMCC::AL))
+        .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
     Src1Reg = GPRScratch1;
     Src2Reg = GPRScratch2;
     // Reuse GPRScratch1 for dest after we are done working with src1.
     DestReg = GPRScratch1;
   } else {
-    // Any non-float, non-vector pseudo has: (outs $dst, $tmp_mask), (ins 
$src1, $src2, $cond))
+    // Any non-float, non-vector pseudo has: (outs $dst, $tmp_mask), (ins 
$src1,
+    // $src2, $cond))
     Src1Reg = MI.getOperand(2).getReg();
     Src2Reg = MI.getOperand(3).getReg();
     CondReg = MI.getOperand(4).getReg();
@@ -1762,11 +1769,11 @@ bool ARMBaseInstrInfo::expandCtSelect(MachineInstr &MI) 
const {
   // When cond = 0: mask = 0x00000000.
   // When cond = 1: mask = 0xFFFFFFFF.
   auto TmpNewMI = BuildMI(*MBB, MI, DL, get(RsbOp), MaskReg)
-      .addReg(CondReg)
-      .addImm(0)
-      .add(predOps(ARMCC::AL))
-      .add(condCodeOp())
-      .setMIFlag(MachineInstr::MIFlag::NoMerge);
+                      .addReg(CondReg)
+                      .addImm(0)
+                      .add(predOps(ARMCC::AL))
+                      .add(condCodeOp())
+                      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // We use the first instruction in the bundle as the first instruction.
   if (!FirstNewMI)
@@ -1774,34 +1781,34 @@ bool ARMBaseInstrInfo::expandCtSelect(MachineInstr &MI) 
const {
 
   // 2. A = src1 & mask
   BuildMI(*MBB, MI, DL, get(AndOp), DestReg)
-    .addReg(Src1Reg)
-    .addReg(MaskReg)
-    .add(predOps(ARMCC::AL))
-    .add(condCodeOp())
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(Src1Reg)
+      .addReg(MaskReg)
+      .add(predOps(ARMCC::AL))
+      .add(condCodeOp())
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // 3. B = src2 & ~mask
   BuildMI(*MBB, MI, DL, get(BicOp), MaskReg)
-    .addReg(Src2Reg)
-    .addReg(MaskReg)
-    .add(predOps(ARMCC::AL))
-    .add(condCodeOp())
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+      .addReg(Src2Reg)
+      .addReg(MaskReg)
+      .add(predOps(ARMCC::AL))
+      .add(condCodeOp())
+      .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   // 4. result = A | B
   auto LastNewMI = BuildMI(*MBB, MI, DL, get(OrrOp), DestReg)
-    .addReg(DestReg)
-    .addReg(MaskReg)
-    .add(predOps(ARMCC::AL))
-    .add(condCodeOp())
-    .setMIFlag(MachineInstr::MIFlag::NoMerge);
+                       .addReg(DestReg)
+                       .addReg(MaskReg)
+                       .add(predOps(ARMCC::AL))
+                       .add(condCodeOp())
+                       .setMIFlag(MachineInstr::MIFlag::NoMerge);
 
   if (IsFloat) {
     // Return our result from GPR to the correct register type.
-    LastNewMI =BuildMI(*MBB, MI, DL, get(ARM::VMOVSR), DestRegSavedRef)
-      .addReg(DestReg)
-      .add(predOps(ARMCC::AL))
-      .setMIFlag(MachineInstr::MIFlag::NoMerge);
+    LastNewMI = BuildMI(*MBB, MI, DL, get(ARM::VMOVSR), DestRegSavedRef)
+                    .addReg(DestReg)
+                    .add(predOps(ARMCC::AL))
+                    .setMIFlag(MachineInstr::MIFlag::NoMerge);
   }
 
   auto BundleStart = FirstNewMI->getIterator();
@@ -1809,7 +1816,7 @@ bool ARMBaseInstrInfo::expandCtSelect(MachineInstr &MI) 
const {
 
   // Add instruction bundling
   finalizeBundle(*MBB, BundleStart, std::next(BundleEnd));
-  
+
   MI.eraseFromParent();
   return true;
 }
@@ -1830,39 +1837,33 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr 
&MI) const {
 
   if (opcode == ARM::CTSELECTf64) {
     if (Subtarget.isThumb1Only()) {
-      LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode << "replaced 
by: " << MI);
+      LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode
+                        << "replaced by: " << MI);
       return expandCtSelectThumb(MI);
     } else {
-      LLVM_DEBUG(dbgs() << "Opcode (vector) " << opcode << "replaced by: " << 
MI);
+      LLVM_DEBUG(dbgs() << "Opcode (vector) " << opcode
+                        << "replaced by: " << MI);
       return expandCtSelectVector(MI);
     }
   }
 
-  if (opcode == ARM::CTSELECTv8i8  ||
-      opcode == ARM::CTSELECTv4i16 ||
-      opcode == ARM::CTSELECTv2i32 ||
-      opcode == ARM::CTSELECTv1i64 ||
-      opcode == ARM::CTSELECTv2f32 ||
-      opcode == ARM::CTSELECTv4f16 ||
-      opcode == ARM::CTSELECTv4bf16 ||
-      opcode == ARM::CTSELECTv16i8 ||
-      opcode == ARM::CTSELECTv8i16 ||
-      opcode == ARM::CTSELECTv4i32 ||
-      opcode == ARM::CTSELECTv2i64 ||
-      opcode == ARM::CTSELECTv4f32 ||
-      opcode == ARM::CTSELECTv2f64 ||
-      opcode == ARM::CTSELECTv8f16 ||
+  if (opcode == ARM::CTSELECTv8i8 || opcode == ARM::CTSELECTv4i16 ||
+      opcode == ARM::CTSELECTv2i32 || opcode == ARM::CTSELECTv1i64 ||
+      opcode == ARM::CTSELECTv2f32 || opcode == ARM::CTSELECTv4f16 ||
+      opcode == ARM::CTSELECTv4bf16 || opcode == ARM::CTSELECTv16i8 ||
+      opcode == ARM::CTSELECTv8i16 || opcode == ARM::CTSELECTv4i32 ||
+      opcode == ARM::CTSELECTv2i64 || opcode == ARM::CTSELECTv4f32 ||
+      opcode == ARM::CTSELECTv2f64 || opcode == ARM::CTSELECTv8f16 ||
       opcode == ARM::CTSELECTv8bf16) {
     LLVM_DEBUG(dbgs() << "Opcode (vector) " << opcode << "replaced by: " << 
MI);
     return expandCtSelectVector(MI);
   }
 
-  if (opcode == ARM::CTSELECTint  || 
-      opcode == ARM::CTSELECTf16  ||
-      opcode == ARM::CTSELECTbf16 ||
-      opcode == ARM::CTSELECTf32) {
+  if (opcode == ARM::CTSELECTint || opcode == ARM::CTSELECTf16 ||
+      opcode == ARM::CTSELECTbf16 || opcode == ARM::CTSELECTf32) {
     if (Subtarget.isThumb1Only()) {
-      LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode << "replaced 
by: " << MI);
+      LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode
+                        << "replaced by: " << MI);
       return expandCtSelectThumb(MI);
     } else {
       LLVM_DEBUG(dbgs() << "Opcode " << opcode << "replaced by: " << MI);
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp 
b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 62f5b21a7..3fdc5734b 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -4203,7 +4203,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
   case ARMISD::CTSELECT: {
     EVT VT = N->getValueType(0);
     unsigned PseudoOpcode;
-    bool IsFloat = false; 
+    bool IsFloat = false;
     bool IsVector = false;
 
     if (VT == MVT::f16) {
@@ -4269,18 +4269,18 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     }
 
     SmallVector<EVT, 4> VTs;
-    VTs.push_back(VT);           // $dst
-    VTs.push_back(MVT::i32);     // $tmp_mask (always GPR)
-    
+    VTs.push_back(VT);       // $dst
+    VTs.push_back(MVT::i32); // $tmp_mask (always GPR)
+
     if (IsVector) {
-      VTs.push_back(VT);         // $bcast_mask (same type as dst for vectors)
+      VTs.push_back(VT); // $bcast_mask (same type as dst for vectors)
     } else if (IsFloat) {
-      VTs.push_back(MVT::i32);   // $scratch1 (GPR)
-      VTs.push_back(MVT::i32);   // $scratch2 (GPR)
+      VTs.push_back(MVT::i32); // $scratch1 (GPR)
+      VTs.push_back(MVT::i32); // $scratch2 (GPR)
     }
-    
+
     // src1, src2, cond
-    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+    SDValue Ops[] = {N->getOperand(0), N->getOperand(1), N->getOperand(2)};
 
     SDNode *ResNode = CurDAG->getMachineNode(PseudoOpcode, SDLoc(N), VTs, Ops);
     ReplaceNode(N, ResNode);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp 
b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c5d7f8048..63005f1c9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -203,7 +203,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT 
PromotedLdStVT) {
   setOperationAction(ISD::SELECT,            VT, Expand);
   setOperationAction(ISD::SELECT_CC,         VT, Expand);
   setOperationAction(ISD::VSELECT,           VT, Expand);
-  setOperationAction(ISD::CTSELECT,          VT, Custom);
+  setOperationAction(ISD::CTSELECT, VT, Custom);
   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
   if (VT.isInteger()) {
     setOperationAction(ISD::SHL, VT, Custom);
@@ -422,12 +422,12 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
   }
 
   // small exotic vectors get scalarised for ctselect
-  setOperationAction(ISD::CTSELECT, MVT::v1i8,  Expand);
+  setOperationAction(ISD::CTSELECT, MVT::v1i8, Expand);
   setOperationAction(ISD::CTSELECT, MVT::v1i16, Expand);
   setOperationAction(ISD::CTSELECT, MVT::v1i32, Expand);
   setOperationAction(ISD::CTSELECT, MVT::v1f32, Expand);
-  setOperationAction(ISD::CTSELECT, MVT::v2i8,  Expand);
-  
+  setOperationAction(ISD::CTSELECT, MVT::v2i8, Expand);
+
   setOperationAction(ISD::CTSELECT, MVT::v2i16, Promote);
   setOperationPromotedToType(ISD::CTSELECT, MVT::v2i16, MVT::v4i16);
   setOperationAction(ISD::CTSELECT, MVT::v4i8, Promote);
@@ -1274,15 +1274,15 @@ ARMTargetLowering::ARMTargetLowering(const 
TargetMachine &TM_,
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-  setOperationAction(ISD::CTSELECT,  MVT::i8,  Promote);
-  setOperationAction(ISD::CTSELECT,  MVT::i16, Promote);
+  setOperationAction(ISD::CTSELECT, MVT::i8, Promote);
+  setOperationAction(ISD::CTSELECT, MVT::i16, Promote);
   setOperationPromotedToType(ISD::CTSELECT, MVT::i16, MVT::i32);
 
-  setOperationAction(ISD::CTSELECT,  MVT::i32, Custom);
-  setOperationAction(ISD::CTSELECT,  MVT::i64, Expand);
-  setOperationAction(ISD::CTSELECT,  MVT::f32, Custom);
-  setOperationAction(ISD::CTSELECT,  MVT::f64, Custom);
-  
+  setOperationAction(ISD::CTSELECT, MVT::i32, Custom);
+  setOperationAction(ISD::CTSELECT, MVT::i64, Expand);
+  setOperationAction(ISD::CTSELECT, MVT::f32, Custom);
+  setOperationAction(ISD::CTSELECT, MVT::f64, Custom);
+
   // Handle f16 and bf16 without falling back to select from ctselect.
   setTargetDAGCombine({ISD::CTSELECT});
 
@@ -1290,7 +1290,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine 
&TM_,
     setOperationAction(ISD::SETCC,     MVT::f16, Expand);
     setOperationAction(ISD::SELECT,    MVT::f16, Custom);
     setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
-    setOperationAction(ISD::CTSELECT,  MVT::f16, Custom);
+    setOperationAction(ISD::CTSELECT, MVT::f16, Custom);
   }
 
   if (Subtarget->hasBF16()) {
@@ -10687,7 +10687,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, 
SelectionDAG &DAG) const {
   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   case ISD::SELECT:        return LowerSELECT(Op, DAG);
   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
-  case ISD::CTSELECT:      return LowerCTSELECT(Op, DAG);
+  case ISD::CTSELECT:
+    return LowerCTSELECT(Op, DAG);
   case ISD::BRCOND:        return LowerBRCOND(Op, DAG);
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
@@ -10918,35 +10919,35 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
     Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
     break;
   case ISD::CTSELECT: {
-      EVT VT = N->getValueType(0);
-    
-      // Handle f16/bf16 type promotion while preserving ctselect
-      if (VT == MVT::f16 || VT == MVT::bf16) {
-        SDLoc DL(N);
-        SDValue Cond = N->getOperand(0);
-        SDValue TrueVal = N->getOperand(1);
-        SDValue FalseVal = N->getOperand(2);
-        
-        // Bitcast to i16, then promote to i32
-        SDValue TrueInt = DAG.getBitcast(MVT::i16, TrueVal);
-        SDValue FalseInt = DAG.getBitcast(MVT::i16, FalseVal);
-        
-        TrueInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, TrueInt);
-        FalseInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, FalseInt);
-        
-        // Normalize condition
-        SDValue One = DAG.getConstant(1, DL, MVT::i32);
-        SDValue CondNorm = DAG.getNode(ISD::AND, DL, MVT::i32, Cond, One);
-        
-        // Create i32 ctselect that will go through normal lowering
-        Res = DAG.getNode(ISD::CTSELECT, DL, MVT::i32,
-                          CondNorm, TrueInt, FalseInt);
-      } else {
-        // For other types, use existing lowering
-        Res = LowerCTSELECT(SDValue(N, 0), DAG);
-      }
-      break;
+    EVT VT = N->getValueType(0);
+
+    // Handle f16/bf16 type promotion while preserving ctselect
+    if (VT == MVT::f16 || VT == MVT::bf16) {
+      SDLoc DL(N);
+      SDValue Cond = N->getOperand(0);
+      SDValue TrueVal = N->getOperand(1);
+      SDValue FalseVal = N->getOperand(2);
+
+      // Bitcast to i16, then promote to i32
+      SDValue TrueInt = DAG.getBitcast(MVT::i16, TrueVal);
+      SDValue FalseInt = DAG.getBitcast(MVT::i16, FalseVal);
+
+      TrueInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, TrueInt);
+      FalseInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, FalseInt);
+
+      // Normalize condition
+      SDValue One = DAG.getConstant(1, DL, MVT::i32);
+      SDValue CondNorm = DAG.getNode(ISD::AND, DL, MVT::i32, Cond, One);
+
+      // Create i32 ctselect that will go through normal lowering
+      Res =
+          DAG.getNode(ISD::CTSELECT, DL, MVT::i32, CondNorm, TrueInt, 
FalseInt);
+    } else {
+      // For other types, use existing lowering
+      Res = LowerCTSELECT(SDValue(N, 0), DAG);
     }
+    break;
+  }
   }
   if (Res.getNode())
     Results.push_back(Res);
@@ -13568,28 +13569,29 @@ static SDValue PerformVQDMULHCombine(SDNode *N, 
SelectionDAG &DAG) {
                      DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
 }
 
-static SDValue PerformCTSELECTCombine(SDNode *N, 
TargetLowering::DAGCombinerInfo &DCI,
-                                     const ARMSubtarget *Subtarget) {
+static SDValue PerformCTSELECTCombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI,
+                                      const ARMSubtarget *Subtarget) {
   if (!DCI.isBeforeLegalize()) {
-      return SDValue();
+    return SDValue();
   }
-      
+
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
-  
+
   EVT VT = N->getValueType(0);
   if (VT == MVT::f16 || VT == MVT::bf16) {
     SDValue Cond = N->getOperand(0);
     SDValue TrueVal = N->getOperand(1);
     SDValue FalseVal = N->getOperand(2);
-    
+
     SDValue TrueInt = DAG.getBitcast(MVT::i16, TrueVal);
     SDValue FalseInt = DAG.getBitcast(MVT::i16, FalseVal);
-    
+
     // Create i16 ctselect - this will be promoted to i32 ctselect naturally
-    SDValue Result = DAG.getNode(ISD::CTSELECT, DL, MVT::i16,
-                                  Cond, TrueInt, FalseInt);
-    
+    SDValue Result =
+        DAG.getNode(ISD::CTSELECT, DL, MVT::i16, Cond, TrueInt, FalseInt);
+
     return DAG.getBitcast(VT, Result);
   } else if (VT.isVector()) {
     EVT EltVT = VT.getVectorElementType();
@@ -13597,7 +13599,7 @@ static SDValue PerformCTSELECTCombine(SDNode *N, 
TargetLowering::DAGCombinerInfo
       SDValue Cond = N->getOperand(0);
       SDValue TrueVal = N->getOperand(1);
       SDValue FalseVal = N->getOperand(2);
-      
+
       EVT IntVT;
       switch (VT.getSimpleVT().SimpleTy) {
       case MVT::v4f16:
@@ -13611,13 +13613,13 @@ static SDValue PerformCTSELECTCombine(SDNode *N, 
TargetLowering::DAGCombinerInfo
       default:
         return SDValue(); // Unsupported vector type
       }
-      
+
       SDValue TrueInt = DAG.getBitcast(IntVT, TrueVal);
       SDValue FalseInt = DAG.getBitcast(IntVT, FalseVal);
-      
-      SDValue Result = DAG.getNode(ISD::CTSELECT, DL, IntVT,
-                                  Cond, TrueInt, FalseInt);
-      
+
+      SDValue Result =
+          DAG.getNode(ISD::CTSELECT, DL, IntVT, Cond, TrueInt, FalseInt);
+
       return DAG.getBitcast(VT, Result);
     }
   }
@@ -19128,7 +19130,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SELECT_CC:
   case ISD::SELECT:     return PerformSELECTCombine(N, DCI, Subtarget);
   case ISD::VSELECT:    return PerformVSELECTCombine(N, DCI, Subtarget);
-  case ISD::CTSELECT:   return PerformCTSELECTCombine(N, DCI, Subtarget);
+  case ISD::CTSELECT:
+    return PerformCTSELECTCombine(N, DCI, Subtarget);
   case ISD::SETCC:      return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
   case ARMISD::ADDE:    return PerformADDECombine(N, DCI, Subtarget);
   case ARMISD::UMLAL:   return PerformUMLALCombine(N, DCI.DAG, Subtarget);

``````````

</details>


https://github.com/llvm/llvm-project/pull/166707
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LLVM][ARM] Add native ct.select support for ARM32 and Thumb (PR #166707)

Reply via email to