github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code.
:warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp --
llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrInfo.cpp llvm/lib/Target/X86/X86InstrInfo.h
llvm/lib/Target/X86/X86TargetMachine.cpp --diff_from_common_commit
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4c73f7402..3afdb884b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29,9 +29,9 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -25372,7 +25372,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, const
SDLoc &dl,
}
SDValue X86TargetLowering::LowerCTSELECT(SDValue Op, SelectionDAG &DAG) const {
- SDValue Cond = Op.getOperand(0); // condition
+ SDValue Cond = Op.getOperand(0); // condition
SDValue TrueOp = Op.getOperand(1); // true_value
SDValue FalseOp = Op.getOperand(2); // false_value
SDLoc DL(Op);
@@ -25542,29 +25542,33 @@ SDValue X86TargetLowering::LowerCTSELECT(SDValue Op,
SelectionDAG &DAG) const {
SDValue FalseSlot = DAG.CreateStackTemporary(MVT::f80);
// Store f80 values to memory
- SDValue StoreTrueF80 = DAG.getStore(Chain, DL, TrueOp, TrueSlot,
- MachinePointerInfo());
- SDValue StoreFalseF80 = DAG.getStore(Chain, DL, FalseOp, FalseSlot,
- MachinePointerInfo());
+ SDValue StoreTrueF80 =
+ DAG.getStore(Chain, DL, TrueOp, TrueSlot, MachinePointerInfo());
+ SDValue StoreFalseF80 =
+ DAG.getStore(Chain, DL, FalseOp, FalseSlot, MachinePointerInfo());
// Load i32 parts from memory (3 chunks for 96-bit f80 storage)
- SDValue TruePart0 = DAG.getLoad(MVT::i32, DL, StoreTrueF80, TrueSlot,
- MachinePointerInfo());
- SDValue TruePart1Ptr = DAG.getMemBasePlusOffset(TrueSlot,
TypeSize::getFixed(4), DL);
+ SDValue TruePart0 =
+ DAG.getLoad(MVT::i32, DL, StoreTrueF80, TrueSlot,
MachinePointerInfo());
+ SDValue TruePart1Ptr =
+ DAG.getMemBasePlusOffset(TrueSlot, TypeSize::getFixed(4), DL);
SDValue TruePart1 = DAG.getLoad(MVT::i32, DL, StoreTrueF80, TruePart1Ptr,
- MachinePointerInfo());
- SDValue TruePart2Ptr = DAG.getMemBasePlusOffset(TrueSlot,
TypeSize::getFixed(8), DL);
+ MachinePointerInfo());
+ SDValue TruePart2Ptr =
+ DAG.getMemBasePlusOffset(TrueSlot, TypeSize::getFixed(8), DL);
SDValue TruePart2 = DAG.getLoad(MVT::i32, DL, StoreTrueF80, TruePart2Ptr,
- MachinePointerInfo());
+ MachinePointerInfo());
SDValue FalsePart0 = DAG.getLoad(MVT::i32, DL, StoreFalseF80, FalseSlot,
- MachinePointerInfo());
- SDValue FalsePart1Ptr = DAG.getMemBasePlusOffset(FalseSlot,
TypeSize::getFixed(4), DL);
+ MachinePointerInfo());
+ SDValue FalsePart1Ptr =
+ DAG.getMemBasePlusOffset(FalseSlot, TypeSize::getFixed(4), DL);
SDValue FalsePart1 = DAG.getLoad(MVT::i32, DL, StoreFalseF80,
FalsePart1Ptr,
- MachinePointerInfo());
- SDValue FalsePart2Ptr = DAG.getMemBasePlusOffset(FalseSlot,
TypeSize::getFixed(8), DL);
+ MachinePointerInfo());
+ SDValue FalsePart2Ptr =
+ DAG.getMemBasePlusOffset(FalseSlot, TypeSize::getFixed(8), DL);
SDValue FalsePart2 = DAG.getLoad(MVT::i32, DL, StoreFalseF80,
FalsePart2Ptr,
- MachinePointerInfo());
+ MachinePointerInfo());
// Perform CTSELECT on each 32-bit chunk
SDValue Part0Ops[] = {FalsePart0, TruePart0, CC, ProcessedCond};
@@ -25576,17 +25580,20 @@ SDValue X86TargetLowering::LowerCTSELECT(SDValue Op,
SelectionDAG &DAG) const {
// Create result stack slot and store the selected parts
SDValue ResultSlot = DAG.CreateStackTemporary(MVT::f80);
- SDValue StorePart0 = DAG.getStore(Chain, DL, Part0Select, ResultSlot,
- MachinePointerInfo());
- SDValue ResPart1Ptr = DAG.getMemBasePlusOffset(ResultSlot,
TypeSize::getFixed(4), DL);
+ SDValue StorePart0 =
+ DAG.getStore(Chain, DL, Part0Select, ResultSlot, MachinePointerInfo());
+ SDValue ResPart1Ptr =
+ DAG.getMemBasePlusOffset(ResultSlot, TypeSize::getFixed(4), DL);
SDValue StorePart1 = DAG.getStore(StorePart0, DL, Part1Select, ResPart1Ptr,
MachinePointerInfo());
- SDValue ResPart2Ptr = DAG.getMemBasePlusOffset(ResultSlot,
TypeSize::getFixed(8), DL);
+ SDValue ResPart2Ptr =
+ DAG.getMemBasePlusOffset(ResultSlot, TypeSize::getFixed(8), DL);
SDValue StorePart2 = DAG.getStore(StorePart1, DL, Part2Select, ResPart2Ptr,
MachinePointerInfo());
// Load complete f80 result from memory
- return DAG.getLoad(MVT::f80, DL, StorePart2, ResultSlot,
MachinePointerInfo());
+ return DAG.getLoad(MVT::f80, DL, StorePart2, ResultSlot,
+ MachinePointerInfo());
}
// Create final CTSELECT node
@@ -38000,10 +38007,8 @@ X86TargetLowering::emitPatchableEventCall(MachineInstr
&MI,
/// This approach ensures that when i64 is type-legalized into two i32
/// operations, both operations share the same condition byte rather than
/// each independently reading (and destroying) EFLAGS.
-static MachineBasicBlock *
-emitCTSelectI386WithConditionMaterialization(MachineInstr &MI,
- MachineBasicBlock *BB,
- unsigned InternalPseudoOpcode) {
+static MachineBasicBlock *emitCTSelectI386WithConditionMaterialization(
+ MachineInstr &MI, MachineBasicBlock *BB, unsigned InternalPseudoOpcode) {
const TargetInstrInfo *TII = BB->getParent()->getSubtarget().getInstrInfo();
const MIMetadata MIMD(MI);
MachineFunction *MF = BB->getParent();
@@ -38047,12 +38052,12 @@
emitCTSelectI386WithConditionMaterialization(MachineInstr &MI,
}
BuildMI(*BB, MI, MIMD, TII->get(InternalPseudoOpcode))
- .addDef(DstReg) // dst (output)
- .addDef(TmpByteReg) // tmp_byte (output)
- .addDef(TmpMaskReg) // tmp_mask (output)
- .addReg(Src1Reg) // src1 (input)
- .addReg(Src2Reg) // src2 (input)
- .addReg(CondByteReg); // pre-materialized condition byte (input)
+ .addDef(DstReg) // dst (output)
+ .addDef(TmpByteReg) // tmp_byte (output)
+ .addDef(TmpMaskReg) // tmp_mask (output)
+ .addReg(Src1Reg) // src1 (input)
+ .addReg(Src2Reg) // src2 (input)
+ .addReg(CondByteReg); // pre-materialized condition byte (input)
MI.eraseFromParent();
return BB;
@@ -38078,8 +38083,8 @@ struct FPLoadMemOperands {
// Check if a virtual register is defined by a simple FP load instruction
// Returns the memory operands if it's a simple load, otherwise returns invalid
static FPLoadMemOperands getFPLoadMemOperands(Register Reg,
- MachineRegisterInfo &MRI,
- unsigned ExpectedLoadOpcode) {
+ MachineRegisterInfo &MRI,
+ unsigned ExpectedLoadOpcode) {
FPLoadMemOperands Result;
if (!Reg.isVirtual())
@@ -38098,9 +38103,9 @@ static FPLoadMemOperands getFPLoadMemOperands(Register
Reg,
if (DefMI->hasOrderedMemoryRef())
return Result;
- // The load should have a single def (the destination register) and memory
operands
- // Format: %reg = LD_Fpxxm <fi#N>, 1, %noreg, 0, %noreg
- // or: %reg = LD_Fpxxm %base, scale, %index, disp, %segment
+ // The load should have a single def (the destination register) and memory
+ // operands Format: %reg = LD_Fpxxm <fi#N>, 1, %noreg, 0, %noreg or: %reg =
+ // LD_Fpxxm %base, scale, %index, disp, %segment
if (DefMI->getNumOperands() < 6)
return Result;
@@ -38125,9 +38130,8 @@ static FPLoadMemOperands getFPLoadMemOperands(Register
Reg,
// Check if this is a constant pool load
// Format: %reg = LD_Fpxxm $noreg, 1, $noreg, %const.N, $noreg
- if (BaseMO.isReg() && BaseMO.getReg() == X86::NoRegister &&
- ScaleMO.isImm() && IndexMO.isReg() &&
- IndexMO.getReg() == X86::NoRegister &&
+ if (BaseMO.isReg() && BaseMO.getReg() == X86::NoRegister && ScaleMO.isImm()
&&
+ IndexMO.isReg() && IndexMO.getReg() == X86::NoRegister &&
DispMO.isCPI() && SegMO.isReg()) {
Result.IsValid = true;
Result.IsConstantPool = true;
@@ -38141,9 +38145,8 @@ static FPLoadMemOperands getFPLoadMemOperands(Register
Reg,
// Check if this is a global variable load
// Format: %reg = LD_Fpxxm $noreg, 1, $noreg, @global_name, $noreg
- if (BaseMO.isReg() && BaseMO.getReg() == X86::NoRegister &&
- ScaleMO.isImm() && IndexMO.isReg() &&
- IndexMO.getReg() == X86::NoRegister &&
+ if (BaseMO.isReg() && BaseMO.getReg() == X86::NoRegister && ScaleMO.isImm()
&&
+ IndexMO.isReg() && IndexMO.getReg() == X86::NoRegister &&
DispMO.isGlobal() && SegMO.isReg()) {
Result.IsValid = true;
Result.IsGlobal = true;
@@ -38157,8 +38160,8 @@ static FPLoadMemOperands getFPLoadMemOperands(Register
Reg,
}
// Regular memory operands (e.g., pointer loads)
- if (BaseMO.isReg() && ScaleMO.isImm() && IndexMO.isReg() &&
- DispMO.isImm() && SegMO.isReg()) {
+ if (BaseMO.isReg() && ScaleMO.isImm() && IndexMO.isReg() && DispMO.isImm() &&
+ SegMO.isReg()) {
Result.IsValid = true;
Result.IsFrameIndex = false;
Result.IsConstantPool = false;
@@ -38184,7 +38187,8 @@ static MachineBasicBlock
*emitCTSelectI386WithFpType(MachineInstr &MI,
unsigned RegSizeInByte = 4;
// Get operands
- // MI operands: %result:rfp80 = CTSELECT_I386 %false:rfp80, %true:rfp80,
%cond:i8imm
+ // MI operands: %result:rfp80 = CTSELECT_I386 %false:rfp80, %true:rfp80,
+ // %cond:i8imm
unsigned DestReg = MI.getOperand(0).getReg();
unsigned FalseReg = MI.getOperand(1).getReg();
unsigned TrueReg = MI.getOperand(2).getReg();
@@ -38202,7 +38206,7 @@ static MachineBasicBlock
*emitCTSelectI386WithFpType(MachineInstr &MI,
// Helper to load integer from memory operands
auto loadIntFromMemOperands = [&](const FPLoadMemOperands &MemOps,
- unsigned Offset) -> unsigned {
+ unsigned Offset) -> unsigned {
unsigned IntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), IntReg);
@@ -38218,18 +38222,21 @@ static MachineBasicBlock
*emitCTSelectI386WithFpType(MachineInstr &MI,
// Constant pool: base_reg + scale + index + CP_index + segment
// MOV32rm format: base, scale, index, displacement, segment
MIB.addReg(X86::NoRegister) // Base register
- .addImm(MemOps.ScaleVal) // Scale
- .addReg(MemOps.IndexReg) // Index register
- .addConstantPoolIndex(MemOps.ConstantPoolIndex, Offset) //
Displacement (CP index)
- .addReg(MemOps.SegReg); // Segment
+ .addImm(MemOps.ScaleVal) // Scale
+ .addReg(MemOps.IndexReg) // Index register
+ .addConstantPoolIndex(MemOps.ConstantPoolIndex,
+ Offset) // Displacement (CP index)
+ .addReg(MemOps.SegReg); // Segment
} else if (MemOps.IsGlobal) {
// Global variable: base_reg + scale + index + global + segment
// MOV32rm format: base, scale, index, displacement, segment
MIB.addReg(X86::NoRegister) // Base register
- .addImm(MemOps.ScaleVal) // Scale
- .addReg(MemOps.IndexReg) // Index register
- .addGlobalAddress(MemOps.Global, MemOps.GlobalOffset + Offset) //
Displacement (global address)
- .addReg(MemOps.SegReg); // Segment
+ .addImm(MemOps.ScaleVal) // Scale
+ .addReg(MemOps.IndexReg) // Index register
+ .addGlobalAddress(MemOps.Global,
+ MemOps.GlobalOffset +
+ Offset) // Displacement (global address)
+ .addReg(MemOps.SegReg); // Segment
} else {
// Regular memory: base_reg + scale + index + disp + segment
MIB.addReg(MemOps.BaseReg)
@@ -38244,45 +38251,47 @@ static MachineBasicBlock
*emitCTSelectI386WithFpType(MachineInstr &MI,
// Optimized path: load integers directly from memory when both operands are
// memory loads, avoiding FP register round-trip
- auto emitCtSelectFromMemory = [&](unsigned NumValues,
- const FPLoadMemOperands &TrueMemOps,
- const FPLoadMemOperands &FalseMemOps,
- int ResultSlot) {
- for (unsigned Val = 0; Val < NumValues; ++Val) {
- unsigned Offset = Val * RegSizeInByte;
-
- // Load true and false values directly from their memory locations as
integers
- unsigned TrueIntReg = loadIntFromMemOperands(TrueMemOps, Offset);
- unsigned FalseIntReg = loadIntFromMemOperands(FalseMemOps, Offset);
-
- // Use CTSELECT_I386_INT_GR32 pseudo instruction for constant-time
selection
- unsigned ResultIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
- unsigned TmpByteReg = MRI.createVirtualRegister(&X86::GR8RegClass);
- unsigned TmpMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
-
- BuildMI(*BB, MI, MIMD, TII->get(X86::CTSELECT_I386_INT_GR32rr))
- .addDef(ResultIntReg) // dst (output)
- .addDef(TmpByteReg) // tmp_byte (output)
- .addDef(TmpMaskReg) // tmp_mask (output)
- .addReg(FalseIntReg) // src1 (input) - false value
- .addReg(TrueIntReg) // src2 (input) - true value
- .addReg(CondByteReg); // pre-materialized condition byte (input)
-
- // Store result back to result slot
- BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32mr))
- .addFrameIndex(ResultSlot)
- .addImm(1)
- .addReg(0)
- .addImm(Offset)
- .addReg(0)
- .addReg(ResultIntReg, RegState::Kill);
- }
- };
+ auto emitCtSelectFromMemory =
+ [&](unsigned NumValues, const FPLoadMemOperands &TrueMemOps,
+ const FPLoadMemOperands &FalseMemOps, int ResultSlot) {
+ for (unsigned Val = 0; Val < NumValues; ++Val) {
+ unsigned Offset = Val * RegSizeInByte;
+
+ // Load true and false values directly from their memory locations as
+ // integers
+ unsigned TrueIntReg = loadIntFromMemOperands(TrueMemOps, Offset);
+ unsigned FalseIntReg = loadIntFromMemOperands(FalseMemOps, Offset);
+
+ // Use CTSELECT_I386_INT_GR32 pseudo instruction for constant-time
+ // selection
+ unsigned ResultIntReg =
MRI.createVirtualRegister(&X86::GR32RegClass);
+ unsigned TmpByteReg = MRI.createVirtualRegister(&X86::GR8RegClass);
+ unsigned TmpMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
+
+ BuildMI(*BB, MI, MIMD, TII->get(X86::CTSELECT_I386_INT_GR32rr))
+ .addDef(ResultIntReg) // dst (output)
+ .addDef(TmpByteReg) // tmp_byte (output)
+ .addDef(TmpMaskReg) // tmp_mask (output)
+ .addReg(FalseIntReg) // src1 (input) - false value
+ .addReg(TrueIntReg) // src2 (input) - true value
+ .addReg(CondByteReg); // pre-materialized condition byte (input)
+
+ // Store result back to result slot
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32mr))
+ .addFrameIndex(ResultSlot)
+ .addImm(1)
+ .addReg(0)
+ .addImm(Offset)
+ .addReg(0)
+ .addReg(ResultIntReg, RegState::Kill);
+ }
+ };
- auto emitCtSelectWithPseudo = [&](unsigned NumValues, int TrueSlot, int
FalseSlot, int ResultSlot) {
+ auto emitCtSelectWithPseudo = [&](unsigned NumValues, int TrueSlot,
+ int FalseSlot, int ResultSlot) {
for (unsigned Val = 0; Val < NumValues; ++Val) {
unsigned Offset = Val * RegSizeInByte;
-
+
// Load true and false values from stack as 32-bit integers
unsigned TrueIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), TrueIntReg)
@@ -38300,18 +38309,19 @@ static MachineBasicBlock
*emitCTSelectI386WithFpType(MachineInstr &MI,
.addImm(Offset)
.addReg(0);
- // Use CTSELECT_I386_INT_GR32 pseudo instruction for constant-time
selection
+ // Use CTSELECT_I386_INT_GR32 pseudo instruction for constant-time
+ // selection
unsigned ResultIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
unsigned TmpByteReg = MRI.createVirtualRegister(&X86::GR8RegClass);
unsigned TmpMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
-
+
BuildMI(*BB, MI, MIMD, TII->get(X86::CTSELECT_I386_INT_GR32rr))
- .addDef(ResultIntReg) // dst (output)
- .addDef(TmpByteReg) // tmp_byte (output)
- .addDef(TmpMaskReg) // tmp_mask (output)
- .addReg(FalseIntReg) // src1 (input) - false value
- .addReg(TrueIntReg) // src2 (input) - true value
- .addReg(CondByteReg); // pre-materialized condition byte (input)
+ .addDef(ResultIntReg) // dst (output)
+ .addDef(TmpByteReg) // tmp_byte (output)
+ .addDef(TmpMaskReg) // tmp_mask (output)
+ .addReg(FalseIntReg) // src1 (input) - false value
+ .addReg(TrueIntReg) // src2 (input) - true value
+ .addReg(CondByteReg); // pre-materialized condition byte (input)
// Store result back to result slot
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32mr))
@@ -38539,7 +38549,7 @@
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitCTSelectI386WithFpType(MI, BB, X86::CTSELECT_I386_FP64rr);
case X86::CTSELECT_I386_FP80rr:
return emitCTSelectI386WithFpType(MI, BB, X86::CTSELECT_I386_FP80rr);
-
+
case X86::FP80_ADDr:
case X86::FP80_ADDm32: {
// Change the floating point control register to use double extended
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp
b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 765db86ff..62453fdf0 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -690,8 +690,7 @@ bool X86InstrInfo::expandCtSelectVector(MachineInstr &MI)
const {
.addImm(31));
} else {
// Negate to convert 1 -> 0xFFFFFFFF, 0 -> 0x00000000 (negl %eax)
- recordInstr(BuildMI(*MBB, MI, DL, get(X86::NEG32r), TmpGPR)
- .addReg(TmpGPR));
+ recordInstr(BuildMI(*MBB, MI, DL, get(X86::NEG32r),
TmpGPR).addReg(TmpGPR));
}
// Broadcast to TmpX (vector mask)
@@ -848,7 +847,8 @@ bool X86InstrInfo::expandCtSelectVector(MachineInstr &MI)
const {
.setMIFlags(MachineInstr::MIFlag::NoMerge));
}
- assert(FirstInstr && LastInstr && "Expected at least one expanded
instruction");
+ assert(FirstInstr && LastInstr &&
+ "Expected at least one expanded instruction");
auto BundleEnd = LastInstr->getIterator();
finalizeBundle(*MBB, FirstInstr->getIterator(), std::next(BundleEnd));
@@ -916,25 +916,28 @@ bool X86InstrInfo::expandCtSelectWithCMOV(MachineInstr
&MI) const {
/// Expand i386-specific CTSELECT pseudo instructions (post-RA, constant-time)
/// These internal pseudos receive a pre-materialized condition byte from the
-/// custom inserter, avoiding EFLAGS corruption issues during i64 type
legalization.
+/// custom inserter, avoiding EFLAGS corruption issues during i64 type
+/// legalization.
bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
MachineBasicBlock *MBB = MI.getParent();
DebugLoc DL = MI.getDebugLoc();
// CTSELECT_I386_INT_GRxxrr has operands: (outs dst, tmp_byte, tmp_mask),
// (ins src1, src2, cond_byte)
- // Note: cond_byte is pre-materialized by custom inserter, not
EFLAGS-dependent
+ // Note: cond_byte is pre-materialized by custom inserter, not
+ // EFLAGS-dependent
Register DstReg = MI.getOperand(0).getReg();
Register TmpByteReg = MI.getOperand(1).getReg();
Register TmpMaskReg = MI.getOperand(2).getReg();
Register Src1Reg = MI.getOperand(3).getReg();
Register Src2Reg = MI.getOperand(4).getReg();
- Register CondByteReg = MI.getOperand(5).getReg(); // Pre-materialized
condition byte
+ Register CondByteReg =
+ MI.getOperand(5).getReg(); // Pre-materialized condition byte
// Determine instruction opcodes based on register width
unsigned MovZXOp, NegOp, MovOp, AndOp, NotOp, OrOp;
if (MI.getOpcode() == X86::CTSELECT_I386_INT_GR8rr) {
- MovZXOp = 0; // No zero-extend needed for GR8
+ MovZXOp = 0; // No zero-extend needed for GR8
NegOp = X86::NEG8r;
MovOp = X86::MOV8rr;
AndOp = X86::AND8rr;
@@ -963,8 +966,8 @@ bool
X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
// Step 1: Copy pre-materialized condition byte to TmpByteReg
// This allows the bundle to work with allocated temporaries
auto I1 = BuildMI(*MBB, MI, DL, get(X86::MOV8rr), TmpByteReg)
- .addReg(CondByteReg)
- .setMIFlag(MachineInstr::MIFlag::NoMerge);
+ .addReg(CondByteReg)
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
auto BundleStart = I1->getIterator();
// Step 2: Zero-extend condition byte to register width (0 or 1)
@@ -975,7 +978,9 @@ bool
X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
}
// Step 3: Convert condition to bitmask (NEG: 1 -> 0xFFFF..., 0 -> 0x0000...)
- Register MaskReg = (MI.getOpcode() == X86::CTSELECT_I386_INT_GR8rr) ?
TmpByteReg : TmpMaskReg;
+ Register MaskReg = (MI.getOpcode() == X86::CTSELECT_I386_INT_GR8rr)
+ ? TmpByteReg
+ : TmpMaskReg;
BuildMI(*MBB, MI, DL, get(NegOp), MaskReg)
.addReg(MaskReg)
.setMIFlag(MachineInstr::MIFlag::NoMerge);
@@ -1003,9 +1008,9 @@ bool
X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
// Step 8: Final result: (src1 & mask) | (src2 & ~mask)
auto LI = BuildMI(*MBB, MI, DL, get(OrOp), DstReg)
- .addReg(DstReg)
- .addReg(MaskReg)
- .setMIFlag(MachineInstr::MIFlag::NoMerge);
+ .addReg(DstReg)
+ .addReg(MaskReg)
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
// Bundle all generated instructions for atomic execution before removing MI
auto BundleEnd = std::next(LI->getIterator());
@@ -1014,11 +1019,12 @@ bool
X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
finalizeBundle(*MBB, BundleStart, BundleEnd);
}
- // TODO: Optimization opportunity - The register allocator may choose
callee-saved
- // registers (e.g., %ebx, %esi) for TmpByteReg/TmpMaskReg, causing
unnecessary
- // save/restore overhead. Consider constraining these to caller-saved
register
- // classes (e.g., GR8_AL, GR32_CallSaved) in the TableGen definitions to
improve
- // constant-time performance by eliminating prologue/epilogue instructions.
+ // TODO: Optimization opportunity - The register allocator may choose
+ // callee-saved registers (e.g., %ebx, %esi) for TmpByteReg/TmpMaskReg,
+ // causing unnecessary save/restore overhead. Consider constraining these to
+ // caller-saved register classes (e.g., GR8_AL, GR32_CallSaved) in the
+ // TableGen definitions to improve constant-time performance by eliminating
+ // prologue/epilogue instructions.
// Remove the original pseudo instruction
MI.eraseFromParent();
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp
b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 66c9d7505..3f98eca04 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -621,7 +621,7 @@ void X86PassConfig::addPreEmitPass2() {
(TT.isOSDarwin() &&
(M->getFunction("objc_retainAutoreleasedReturnValue") ||
M->getFunction("objc_unsafeClaimAutoreleasedReturnValue"))) ||
- F.hasFnAttribute("ct-select");
+ F.hasFnAttribute("ct-select");
}));
// Analyzes and emits pseudos to support Win x64 Unwind V2. This pass must
run
``````````
</details>
https://github.com/llvm/llvm-project/pull/166704
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits