Modified: trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp (294086 => 294087)
--- trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp 2022-05-12 01:44:45 UTC (rev 294086)
+++ trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp 2022-05-12 02:06:37 UTC (rev 294087)
@@ -78,7 +78,7 @@
{
m_liveRangeEnd = TmpMap<size_t>(m_code, 0);
- m_globalInstIndex = 0;
+ m_globalInstIndex = 1;
for (BasicBlock* block : m_code) {
for (Tmp tmp : liveness.liveAtHead(block)) {
if (!tmp.isReg())
@@ -180,11 +180,12 @@
{
ASSERT(reg);
ASSERT(m_map[tmp].reg == reg);
+ ASSERT(tmp.isReg() || m_liveRangeEnd[tmp] >= m_globalInstIndex);
flush(tmp, reg);
release(tmp, reg);
}
-ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, bool isDef)
+ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, Arg::Role role)
{
if (Tmp occupyingTmp = m_currentAllocation->at(reg))
spill(occupyingTmp, reg);
@@ -197,7 +198,7 @@
m_availableRegs[tmp.bank()].clear(reg);
m_currentAllocation->at(reg) = tmp;
- if (!isDef) {
+ if (Arg::isAnyUse(role)) {
intptr_t offset = m_map[tmp].spillSlot->offsetFromFP();
if (tmp.bank() == GP)
m_jit->load64(callFrameAddr(*m_jit, offset), reg.gpr());
@@ -206,12 +207,29 @@
}
}
-ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded()
+// freeDeadTmpsAtCurrentInst and freeDeadTmpsAtCurrentBlock are needed for correctness because
+// we reuse stack slots between Tmps that don't interfere. So we need to make sure we don't
+// spill a dead Tmp to a live Tmp's slot.
+// freeDeadTmpsAtCurrentInst is meant to be called as we walk through each instruction in a basic block
+// because it doesn't consult the entire register file, and is faster than freeDeadTmpsAtCurrentBlock.
+// However, it only prunes things that die at a particular inst index within a block, and doesn't prevent
+// us from propagating a Tmp that is live in one block to the head of a block where it is dead. If
+// something dies within a block, freeDeadTmpsAtCurrentInst will catch it. freeDeadTmpsAtCurrentBlock is
+// meant to ensure we prune away any Tmps that are dead at the head of a block.
+ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsAtCurrentInst()
{
- if (m_didAlreadyFreeDeadSlots)
- return;
+ auto iter = m_tmpsToRelease.find(m_globalInstIndex);
+ if (iter != m_tmpsToRelease.end()) {
+ for (Tmp tmp : iter->value) {
+ ASSERT(m_liveRangeEnd[tmp] < m_globalInstIndex);
+ if (Reg reg = m_map[tmp].reg)
+ release(tmp, reg);
+ }
+ }
+}
- m_didAlreadyFreeDeadSlots = true;
+ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsAtCurrentBlock()
+{
for (size_t i = 0; i < m_currentAllocation->size(); ++i) {
Tmp tmp = m_currentAllocation->at(i);
if (!tmp)
@@ -225,44 +243,73 @@
}
}
-ALWAYS_INLINE bool GenerateAndAllocateRegisters::assignTmp(Tmp& tmp, Bank bank, bool isDef)
+ALWAYS_INLINE bool GenerateAndAllocateRegisters::assignTmp(Tmp& tmp, Bank bank, Arg::Role role)
{
ASSERT(!tmp.isReg());
+
+ auto markRegisterAsUsed = [&] (Reg reg) {
+ if (Arg::isAnyDef(role))
+ m_clobberedToClear.clear(reg);
+ // At this point, it doesn't matter if we add it to the m_namedUsedRegs or m_namedDefdRegs.
+ // We just need to mark that we can't use it again for another tmp.
+ m_namedUsedRegs.set(reg);
+ };
+
+ bool mightInterfere = m_earlyClobber.numberOfSetRegisters() || m_lateClobber.numberOfSetRegisters();
+
+ auto interferesWithClobber = [&] (Reg reg) {
+ if (!mightInterfere)
+ return false;
+ if (Arg::isAnyUse(role) && m_earlyClobber.get(reg))
+ return true;
+ if (Arg::isAnyDef(role) && m_lateClobber.get(reg))
+ return true;
+ if (Arg::activeAt(role, Arg::Phase::Early) && m_earlyClobber.get(reg))
+ return true;
+ if (Arg::activeAt(role, Arg::Phase::Late) && m_lateClobber.get(reg))
+ return true;
+ return false;
+ };
+
if (Reg reg = m_map[tmp].reg) {
- ASSERT(!m_namedDefdRegs.contains(reg));
- tmp = Tmp(reg);
- m_namedUsedRegs.set(reg);
- ASSERT(!m_availableRegs[bank].get(reg));
- return true;
+ if (!interferesWithClobber(reg)) {
+ ASSERT(!m_namedDefdRegs.contains(reg));
+ tmp = Tmp(reg);
+ markRegisterAsUsed(reg);
+ ASSERT(!m_availableRegs[bank].get(reg));
+ return true;
+ }
+ // This is a rare case when we've already allocated a Tmp in some way, but another
+ // Role of the Tmp imposes some restriction on the register value. E.g, if
+ // we have a program like:
+ // Patch Use:tmp1, LateUse:tmp1, lateClobber:x0
+ // The first use of tmp1 can be allocated to x0, but the second cannot.
+ spill(tmp, reg);
+
}
- if (!m_availableRegs[bank].numberOfSetRegisters())
- freeDeadTmpsIfNeeded();
-
if (m_availableRegs[bank].numberOfSetRegisters()) {
// We first take an available register.
for (Reg reg : m_registers[bank]) {
- if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
+ if (interferesWithClobber(reg) || m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
continue;
if (!m_availableRegs[bank].contains(reg))
continue;
- m_namedUsedRegs.set(reg); // At this point, it doesn't matter if we add it to the m_namedUsedRegs or m_namedDefdRegs. We just need to mark that we can't use it again.
- alloc(tmp, reg, isDef);
+
+ markRegisterAsUsed(reg);
+ alloc(tmp, reg, role);
tmp = Tmp(reg);
return true;
}
-
- RELEASE_ASSERT_NOT_REACHED();
}
// Nothing was available, let's make some room.
for (Reg reg : m_registers[bank]) {
- if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
+ if (interferesWithClobber(reg) || m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
continue;
- m_namedUsedRegs.set(reg);
-
- alloc(tmp, reg, isDef);
+ markRegisterAsUsed(reg);
+ alloc(tmp, reg, role);
tmp = Tmp(reg);
return true;
}
@@ -298,7 +345,7 @@
Vector<StackSlot*, 16> freeSlots;
Vector<StackSlot*, 4> toFree;
- m_globalInstIndex = 0;
+ m_globalInstIndex = 1;
for (BasicBlock* block : m_code) {
auto assignStackSlotToTmp = [&] (Tmp tmp) {
if (tmp.isReg())
@@ -424,6 +471,12 @@
buildLiveRanges(*m_liveness);
+ m_code.forEachTmp([&] (Tmp tmp) {
+ ASSERT(!tmp.isReg());
+ if (size_t liveRangeEnd = m_liveRangeEnd[tmp])
+ m_tmpsToRelease.add(liveRangeEnd + 1, Vector<Tmp, 2>()).iterator->value.append(tmp);
+ });
+
IndexMap<BasicBlock*, IndexMap<Reg, Tmp>> currentAllocationMap(m_code.size());
{
IndexMap<Reg, Tmp> defaultCurrentAllocation(Reg::maxIndex() + 1);
@@ -461,7 +514,7 @@
Disassembler* disassembler = m_code.disassembler();
- m_globalInstIndex = 0;
+ m_globalInstIndex = 1;
for (BasicBlock* block : m_code) {
context.currentBlock = block;
@@ -525,6 +578,8 @@
++m_globalInstIndex;
+ freeDeadTmpsAtCurrentBlock();
+
bool isReplayingSameInst = false;
for (size_t instIndex = 0; instIndex < block->size(); ++instIndex) {
checkConsistency();
@@ -536,10 +591,11 @@
Inst& inst = block->at(instIndex);
- m_didAlreadyFreeDeadSlots = false;
-
m_namedUsedRegs = RegisterSet();
m_namedDefdRegs = RegisterSet();
+ m_earlyClobber = RegisterSet();
+ m_lateClobber = RegisterSet();
+ m_clobberedToClear = RegisterSet();
bool needsToGenerate = ([&] () -> bool {
// FIXME: We should consider trying to figure out if we can also elide Mov32s
@@ -614,25 +670,19 @@
}
});
- RegisterSet clobberedRegisters;
- RegisterSet earlyNextClobberedRegisters;
- {
- Inst* nextInst = block->get(instIndex + 1);
- if (inst.kind.opcode == Patch || (nextInst && nextInst->kind.opcode == Patch)) {
- if (inst.kind.opcode == Patch)
- clobberedRegisters.merge(inst.extraClobberedRegs());
- if (nextInst && nextInst->kind.opcode == Patch)
- earlyNextClobberedRegisters.merge(nextInst->extraEarlyClobberedRegs());
+ if (inst.kind.opcode == Patch) {
+ m_earlyClobber.merge(inst.extraEarlyClobberedRegs());
+ m_lateClobber.merge(inst.extraClobberedRegs());
- clobberedRegisters.filter(m_allowedRegisters);
- clobberedRegisters.exclude(m_namedDefdRegs);
- earlyNextClobberedRegisters.filter(m_allowedRegisters);
+ m_earlyClobber.filter(m_allowedRegisters);
+ m_lateClobber.filter(m_allowedRegisters);
- m_namedDefdRegs.merge(clobberedRegisters);
- }
+ m_clobberedToClear.merge(m_earlyClobber);
+ m_clobberedToClear.merge(m_lateClobber);
+ m_clobberedToClear.exclude(m_namedDefdRegs);
}
- auto allocNamed = [&] (const RegisterSet& named, bool isDef) {
+ auto allocNamed = [&] (const RegisterSet& named, Arg::Role role) {
for (Reg reg : named) {
if (Tmp occupyingTmp = currentAllocation[reg]) {
if (occupyingTmp == Tmp(reg))
@@ -639,18 +689,29 @@
continue;
}
- freeDeadTmpsIfNeeded(); // We don't want to spill a dead tmp.
- alloc(Tmp(reg), reg, isDef);
+ alloc(Tmp(reg), reg, role);
}
};
- allocNamed(m_namedUsedRegs, false); // Must come before the defd registers since we may use and def the same register.
- allocNamed(m_namedDefdRegs, true);
+ auto spillIfNeeded = [&] (const RegisterSet& set) {
+ for (Reg reg : set) {
+ if (Tmp tmp = m_currentAllocation->at(reg))
+ spill(tmp, reg);
+ }
+ };
+ freeDeadTmpsAtCurrentInst();
+
+ spillIfNeeded(m_earlyClobber);
+ spillIfNeeded(m_lateClobber);
+
+ allocNamed(m_namedUsedRegs, Arg::Role::Use); // Must come before the defd registers since we may use and def the same register.
+ allocNamed(m_namedDefdRegs, Arg::Role::Def);
+
if (needsToGenerate) {
auto tryAllocate = [&] {
- Vector<Tmp*, 8> usesToAlloc;
- Vector<Tmp*, 8> defsToAlloc;
+ Vector<std::pair<Tmp*, Arg::Role>, 8> usesToAlloc;
+ Vector<std::pair<Tmp*, Arg::Role>, 8> defsToAlloc;
inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) {
if (tmp.isReg())
@@ -658,15 +719,18 @@
// We treat Use+Def as a use.
if (Arg::isAnyUse(role))
- usesToAlloc.append(&tmp);
+ usesToAlloc.append(std::pair { &tmp, role });
else if (Arg::isAnyDef(role))
- defsToAlloc.append(&tmp);
+ defsToAlloc.append(std::pair { &tmp, role });
});
- auto tryAllocateTmps = [&] (auto& vector, bool isDef) {
+ auto tryAllocateTmps = [&] (auto& vector) {
bool success = true;
- for (Tmp* tmp : vector)
- success &= assignTmp(*tmp, tmp->bank(), isDef);
+ for (std::pair<Tmp*, Arg::Role> pair : vector) {
+ Tmp& tmp = *std::get<0>(pair);
+ Arg::Role role = std::get<1>(pair);
+ success &= assignTmp(tmp, tmp.bank(), role);
+ }
return success;
};
@@ -676,8 +740,8 @@
// some tmps may both be used and defd. So we handle uses first since forEachTmp could
// walk uses/defs in any order.
bool success = true;
- success &= tryAllocateTmps(usesToAlloc, false);
- success &= tryAllocateTmps(defsToAlloc, true);
+ success &= tryAllocateTmps(usesToAlloc);
+ success &= tryAllocateTmps(defsToAlloc);
return success;
};
@@ -729,7 +793,6 @@
}
if (m_code.needsUsedRegisters() && inst.kind.opcode == Patch) {
- freeDeadTmpsIfNeeded();
RegisterSet registerSet;
for (size_t i = 0; i < currentAllocation.size(); ++i) {
if (currentAllocation[i])
@@ -738,13 +801,10 @@
inst.reportUsedRegisters(registerSet);
}
- auto clobber = [&] (const RegisterSet& set) {
- for (Reg reg : set) {
- Tmp tmp(reg);
- ASSERT(currentAllocation[reg] == tmp);
- m_availableRegs[tmp.bank()].set(reg);
- m_currentAllocation->at(reg) = Tmp();
- m_map[tmp].reg = Reg();
+ auto handleClobber = [&] {
+ for (Reg reg : m_clobberedToClear) {
+ if (Tmp tmp = m_currentAllocation->at(reg))
+ release(tmp, reg);
}
};
@@ -754,14 +814,11 @@
jump = inst.generate(*m_jit, context);
ASSERT_UNUSED(jump, !jump.isSet());
- allocNamed(earlyNextClobberedRegisters, true);
- clobberedRegisters.merge(earlyNextClobberedRegisters);
- clobber(clobberedRegisters);
+ handleClobber();
} else {
ASSERT(needsToGenerate);
- clobber(clobberedRegisters);
- ASSERT(earlyNextClobberedRegisters.isEmpty());
+ handleClobber();
if (block->numSuccessors()) {
// By default, we spill everything between block boundaries. However, we have a small
Modified: trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.h (294086 => 294087)
--- trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.h 2022-05-12 01:44:45 UTC (rev 294086)
+++ trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.h 2022-05-12 02:06:37 UTC (rev 294087)
@@ -59,9 +59,10 @@
void release(Tmp, Reg);
void flush(Tmp, Reg);
void spill(Tmp, Reg);
- void alloc(Tmp, Reg, bool isDef);
- void freeDeadTmpsIfNeeded();
- bool assignTmp(Tmp&, Bank, bool isDef);
+ void alloc(Tmp, Reg, Arg::Role);
+ void freeDeadTmpsAtCurrentInst();
+ void freeDeadTmpsAtCurrentBlock();
+ bool assignTmp(Tmp&, Bank, Arg::Role);
void buildLiveRanges(UnifiedTmpLiveness&);
bool isDisallowedRegister(Reg);
@@ -80,10 +81,13 @@
RegisterSet m_availableRegs[numBanks];
size_t m_globalInstIndex;
IndexMap<Reg, Tmp>* m_currentAllocation { nullptr };
- bool m_didAlreadyFreeDeadSlots;
TmpMap<size_t> m_liveRangeEnd;
+ HashMap<size_t, Vector<Tmp, 2>> m_tmpsToRelease;
RegisterSet m_namedUsedRegs;
RegisterSet m_namedDefdRegs;
+ RegisterSet m_earlyClobber;
+ RegisterSet m_lateClobber;
+ RegisterSet m_clobberedToClear;
RegisterSet m_allowedRegisters;
std::unique_ptr<UnifiedTmpLiveness> m_liveness;
Modified: trunk/Source/_javascript_Core/b3/air/testair.cpp (294086 => 294087)
--- trunk/Source/_javascript_Core/b3/air/testair.cpp 2022-05-12 01:44:45 UTC (rev 294086)
+++ trunk/Source/_javascript_Core/b3/air/testair.cpp 2022-05-12 02:06:37 UTC (rev 294087)
@@ -2392,6 +2392,124 @@
CHECK(result == static_cast<int32_t>(numberOfSlots));
}
+void testEarlyAndLateUseOfSameTmp()
+{
+ WeakRandom weakRandom;
+ size_t numTmps = RegisterSet::allGPRs().numberOfSetRegisters();
+ int64_t expectedResult = 0;
+ for (size_t i = 0; i < numTmps; ++i)
+ expectedResult += i;
+
+ for (unsigned j = 0; j < 60; ++j) {
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ B3::Air::Special* patchpointSpecial = code.addSpecial(makeUnique<B3::PatchpointSpecial>());
+
+ BasicBlock* root = code.addBlock();
+ Vector<Tmp> tmps;
+ Tmp result = code.newTmp(B3::GP);
+ root->append(Move, nullptr, Arg::imm(0), result);
+ for (size_t i = 0; i < numTmps; ++i) {
+ Tmp tmp = code.newTmp(B3::GP);
+ tmps.append(tmp);
+ root->append(Move, nullptr, Arg::imm(i), tmp);
+ }
+
+ {
+ unsigned rand = weakRandom.getUint32(tmps.size());
+ B3::Value* dummyValue = proc.addConstant(B3::Origin(), B3::Int64, 0);
+
+ B3::PatchpointValue* patchpoint = proc.add<B3::PatchpointValue>(B3::Void, B3::Origin());
+ patchpoint->append(dummyValue, B3::ValueRep::SomeRegister);
+ patchpoint->append(dummyValue, B3::ValueRep::SomeLateRegister);
+ patchpoint->clobberLate(RegisterSet::volatileRegistersForJSCall());
+ patchpoint->setGenerator([=] (CCallHelpers& jit, const B3::StackmapGenerationParams& params) {
+ RELEASE_ASSERT(!RegisterSet::volatileRegistersForJSCall().get(params[1].gpr()));
+
+ auto good = jit.branch64(CCallHelpers::Equal, params[1].gpr(), CCallHelpers::TrustedImm32(rand));
+ jit.breakpoint();
+ good.link(&jit);
+
+ auto good2 = jit.branch64(CCallHelpers::Equal, params[0].gpr(), CCallHelpers::TrustedImm32(rand));
+ jit.breakpoint();
+ good2.link(&jit);
+ });
+
+ Inst inst(Patch, patchpoint, Arg::special(patchpointSpecial));
+
+ Tmp tmp = tmps[rand];
+ inst.args.append(tmp);
+ inst.args.append(tmp);
+ root->append(inst);
+ }
+
+ for (Tmp tmp : tmps)
+ root->append(Add64, nullptr, tmp, result);
+ root->append(Move, nullptr, result, Tmp(GPRInfo::returnValueGPR));
+ root->append(Ret64, nullptr, Tmp(GPRInfo::returnValueGPR));
+
+ int64_t actualResult = compileAndRun<int64_t>(proc);
+ CHECK(actualResult == expectedResult);
+ }
+}
+
+void testEarlyClobberInterference()
+{
+ WeakRandom weakRandom;
+ size_t numTmps = RegisterSet::allGPRs().numberOfSetRegisters();
+ int64_t expectedResult = 0;
+ for (size_t i = 0; i < numTmps; ++i)
+ expectedResult += i;
+
+ for (unsigned j = 0; j < 100; ++j) {
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ B3::Air::Special* patchpointSpecial = code.addSpecial(makeUnique<B3::PatchpointSpecial>());
+
+ BasicBlock* root = code.addBlock();
+ Vector<Tmp> tmps;
+ Tmp result = code.newTmp(B3::GP);
+ root->append(Move, nullptr, Arg::imm(0), result);
+ for (size_t i = 0; i < numTmps; ++i) {
+ Tmp tmp = code.newTmp(B3::GP);
+ tmps.append(tmp);
+ root->append(Move, nullptr, Arg::imm(i), tmp);
+ }
+
+ {
+ unsigned rand = weakRandom.getUint32(tmps.size());
+ B3::Value* dummyValue = proc.addConstant(B3::Origin(), B3::Int64, 0);
+
+ B3::PatchpointValue* patchpoint = proc.add<B3::PatchpointValue>(B3::Void, B3::Origin());
+ patchpoint->append(dummyValue, B3::ValueRep::SomeRegister);
+ patchpoint->clobberEarly(RegisterSet::volatileRegistersForJSCall());
+ patchpoint->setGenerator([=] (CCallHelpers& jit, const B3::StackmapGenerationParams& params) {
+ RELEASE_ASSERT(!RegisterSet::volatileRegistersForJSCall().get(params[0].gpr()));
+
+ auto good = jit.branch64(CCallHelpers::Equal, params[0].gpr(), CCallHelpers::TrustedImm32(rand));
+ jit.breakpoint();
+ good.link(&jit);
+ });
+
+ Inst inst(Patch, patchpoint, Arg::special(patchpointSpecial));
+
+ Tmp tmp = tmps[rand];
+ inst.args.append(tmp);
+ root->append(inst);
+ }
+
+ for (Tmp tmp : tmps)
+ root->append(Add64, nullptr, tmp, result);
+ root->append(Move, nullptr, result, Tmp(GPRInfo::returnValueGPR));
+ root->append(Ret64, nullptr, Tmp(GPRInfo::returnValueGPR));
+
+ int64_t actualResult = compileAndRun<int64_t>(proc);
+ CHECK(actualResult == expectedResult);
+ }
+}
+
#define PREFIX "O", Options::defaultB3OptLevel(), ": "
#define RUN(test) do { \
@@ -2489,6 +2607,9 @@
RUN(testZDefOfSpillSlotWithOffsetNeedingToBeMaterializedInARegister());
+ RUN(testEarlyAndLateUseOfSameTmp());
+ RUN(testEarlyClobberInterference());
+
if (tasks.isEmpty())
usage();