Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (97196 => 97197)
--- trunk/Source/_javascript_Core/ChangeLog 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/ChangeLog 2011-10-11 23:50:26 UTC (rev 97197)
@@ -1,3 +1,58 @@
+2011-10-11 Filip Pizlo <fpi...@apple.com>
+
+ DFG virtual register allocator should be more aggressive in
+ reusing temporary slots
+ https://bugs.webkit.org/show_bug.cgi?id=69868
+
+ Reviewed by Oliver Hunt.
+
+ 1.2% win on V8, neutral elsewhere. The win is probably because it
+ increases precision of GC conservative scans.
+
+ This required making the DFG::ScoreBoard operate over a bitvector
+ of preserved variables, rather than just a preserved variable
+ threshold. To do this, I improved the WTF::BitVector class to make
+ it more user-friendly. It still retains all previous functionality.
+ Also made changes to PackedIntVector to accomodate those changes.
+ Finally, this adds more debugging to the virtual register allocator
+ and to the OSR exit code, as this was necessary to track down bugs
+ in an earlier version of this patch.
+
+ * dfg/DFGByteCodeParser.cpp:
+ (JSC::DFG::ByteCodeParser::ByteCodeParser):
+ (JSC::DFG::ByteCodeParser::getLocal):
+ * dfg/DFGGraph.h:
+ * dfg/DFGJITCompiler.cpp:
+ (JSC::DFG::JITCompiler::exitSpeculativeWithOSR):
+ * dfg/DFGPropagator.cpp:
+ (JSC::DFG::Propagator::allocateVirtualRegisters):
+ * dfg/DFGScoreBoard.h:
+ (JSC::DFG::ScoreBoard::ScoreBoard):
+ (JSC::DFG::ScoreBoard::~ScoreBoard):
+ (JSC::DFG::ScoreBoard::allocate):
+ (JSC::DFG::ScoreBoard::use):
+ (JSC::DFG::ScoreBoard::highWatermark):
+ (JSC::DFG::ScoreBoard::dump):
+ (JSC::DFG::ScoreBoard::max):
+ * dfg/DFGSpeculativeJIT.cpp:
+ (JSC::DFG::ValueRecovery::dump):
+ * wtf/BitVector.cpp:
+ (WTF::BitVector::setSlow):
+ (WTF::BitVector::resizeOutOfLine):
+ (WTF::BitVector::dump):
+ * wtf/BitVector.h:
+ (WTF::BitVector::BitVector):
+ (WTF::BitVector::operator=):
+ (WTF::BitVector::quickGet):
+ (WTF::BitVector::quickSet):
+ (WTF::BitVector::quickClear):
+ (WTF::BitVector::get):
+ (WTF::BitVector::set):
+ (WTF::BitVector::clear):
+ * wtf/PackedIntVector.h:
+ (WTF::PackedIntVector::get):
+ (WTF::PackedIntVector::set):
+
2011-10-11 Gavin Barraclough <baraclo...@apple.com>
DFG JIT 32_64 - Switch to cdecl calling convention.
Modified: trunk/Source/_javascript_Core/dfg/DFGByteCodeParser.cpp (97196 => 97197)
--- trunk/Source/_javascript_Core/dfg/DFGByteCodeParser.cpp 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/dfg/DFGByteCodeParser.cpp 2011-10-11 23:50:26 UTC (rev 97197)
@@ -60,6 +60,9 @@
, m_globalResolveNumber(0)
{
ASSERT(m_profiledBlock);
+
+ for (int i = 0; i < codeBlock->m_numVars; ++i)
+ m_preservedVars.set(i);
}
// Parse a full CodeBlock of bytecode.
@@ -137,7 +140,7 @@
// Check for reads of temporaries from prior blocks,
// expand m_preservedVars to cover these.
- m_preservedVars = std::max(m_preservedVars, operand + 1);
+ m_preservedVars.set(operand);
VariableAccessData* variableAccessData = newVariableAccessData(operand);
@@ -617,10 +620,10 @@
unsigned m_numArguments;
// The number of locals (vars + temporaries) used in the function.
unsigned m_numLocals;
- // The number of registers we need to preserve across BasicBlock boundaries;
- // typically equal to the number vars, but we expand this to cover all
+ // The set of registers we need to preserve across BasicBlock boundaries;
+ // typically equal to the set of vars, but we expand this to cover all
// temporaries that persist across blocks (dues to ?:, &&, ||, etc).
- unsigned m_preservedVars;
+ BitVector m_preservedVars;
// The number of slots (in units of sizeof(Register)) that we need to
// preallocate for calls emanating from this frame. This includes the
// size of the CallFrame, only if this is not a leaf function. (I.e.
Modified: trunk/Source/_javascript_Core/dfg/DFGGraph.h (97196 => 97197)
--- trunk/Source/_javascript_Core/dfg/DFGGraph.h 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/dfg/DFGGraph.h 2011-10-11 23:50:26 UTC (rev 97197)
@@ -32,6 +32,7 @@
#include "DFGNode.h"
#include "PredictionTracker.h"
#include "RegisterFile.h"
+#include <wtf/BitVector.h>
#include <wtf/HashMap.h>
#include <wtf/Vector.h>
#include <wtf/StdLibExtras.h>
@@ -291,7 +292,7 @@
SegmentedVector<VariableAccessData, 16> m_variableAccessData;
SegmentedVector<StructureSet, 16> m_structureSet;
SegmentedVector<StructureTransitionData, 8> m_structureTransitionData;
- unsigned m_preservedVars;
+ BitVector m_preservedVars;
unsigned m_localVars;
unsigned m_parameterSlots;
private:
Modified: trunk/Source/_javascript_Core/dfg/DFGJITCompiler.cpp (97196 => 97197)
--- trunk/Source/_javascript_Core/dfg/DFGJITCompiler.cpp 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/dfg/DFGJITCompiler.cpp 2011-10-11 23:50:26 UTC (rev 97197)
@@ -45,7 +45,7 @@
exit.m_check.link(this);
#if ENABLE(DFG_DEBUG_VERBOSE)
- fprintf(stderr, "OSR exit for Node @%d (bc#%u) at JIT offset 0x%x ", (int)exit.m_nodeIndex, exit.m_bytecodeIndex, debugOffset());
+ fprintf(stderr, "OSR exit for Node @%d (bc#%u) at JIT offset 0x%x ", (int)exit.m_nodeIndex, exit.m_bytecodeIndex, debugOffset());
exit.dump(stderr);
#endif
#if ENABLE(DFG_VERBOSE_SPECULATION_FAILURE)
@@ -161,6 +161,23 @@
}
}
+#if ENABLE(DFG_DEBUG_VERBOSE)
+ fprintf(stderr, " ");
+ if (numberOfPoisonedVirtualRegisters)
+ fprintf(stderr, "Poisoned=%u ", numberOfPoisonedVirtualRegisters);
+ if (numberOfDisplacedVirtualRegisters)
+ fprintf(stderr, "Displaced=%u ", numberOfDisplacedVirtualRegisters);
+ if (haveUnboxedInt32s)
+ fprintf(stderr, "UnboxedInt32 ");
+ if (haveFPRs)
+ fprintf(stderr, "FPR ");
+ if (haveConstants)
+ fprintf(stderr, "Constants ");
+ if (haveUndefined)
+ fprintf(stderr, "Undefined ");
+ fprintf(stderr, " ");
+#endif
+
EncodedJSValue* scratchBuffer = static_cast<EncodedJSValue*>(globalData()->scratchBufferForSize(sizeof(EncodedJSValue) * (numberOfPoisonedVirtualRegisters + (numberOfDisplacedVirtualRegisters <= GPRInfo::numberOfRegisters ? 0 : numberOfDisplacedVirtualRegisters))));
// From here on, the code assumes that it is profitable to maximize the distance
@@ -485,7 +502,7 @@
jump(GPRInfo::regT1);
#if ENABLE(DFG_DEBUG_VERBOSE)
- fprintf(stderr, " -> %p\n", jumpTarget);
+ fprintf(stderr, "-> %p\n", jumpTarget);
#endif
}
Modified: trunk/Source/_javascript_Core/dfg/DFGPropagator.cpp (97196 => 97197)
--- trunk/Source/_javascript_Core/dfg/DFGPropagator.cpp 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/dfg/DFGPropagator.cpp 2011-10-11 23:50:26 UTC (rev 97197)
@@ -1402,6 +1402,11 @@
void allocateVirtualRegisters()
{
+#if ENABLE(DFG_DEBUG_VERBOSE)
+ printf("Preserved vars: ");
+ m_graph.m_preservedVars.dump(stdout);
+ printf("\n");
+#endif
ScoreBoard scoreBoard(m_graph, m_graph.m_preservedVars);
unsigned sizeExcludingPhiNodes = m_graph.m_blocks.last()->end;
for (size_t i = 0; i < sizeExcludingPhiNodes; ++i) {
@@ -1440,9 +1445,12 @@
// 'm_numCalleeRegisters' is the number of locals and temporaries allocated
// for the function (and checked for on entry). Since we perform a new and
// different allocation of temporaries, more registers may now be required.
- unsigned calleeRegisters = scoreBoard.allocatedCount() + m_graph.m_preservedVars + m_graph.m_parameterSlots;
+ unsigned calleeRegisters = scoreBoard.highWatermark() + m_graph.m_parameterSlots;
if ((unsigned)m_codeBlock->m_numCalleeRegisters < calleeRegisters)
m_codeBlock->m_numCalleeRegisters = calleeRegisters;
+#if ENABLE(DFG_DEBUG_VERBOSE)
+ printf("Num callee registers: %u\n", calleeRegisters);
+#endif
}
Graph& m_graph;
Modified: trunk/Source/_javascript_Core/dfg/DFGScoreBoard.h (97196 => 97197)
--- trunk/Source/_javascript_Core/dfg/DFGScoreBoard.h 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/dfg/DFGScoreBoard.h 2011-10-11 23:50:26 UTC (rev 97197)
@@ -29,6 +29,7 @@
#if ENABLE(DFG_JIT)
#include <dfg/DFGGraph.h>
+#include <wtf/BitVector.h>
#include <wtf/Vector.h>
namespace JSC { namespace DFG {
@@ -42,23 +43,27 @@
// another node.
class ScoreBoard {
public:
- ScoreBoard(Graph& graph, uint32_t firstTemporary)
+ ScoreBoard(Graph& graph, const BitVector& usedVars)
: m_graph(graph)
- , m_firstTemporary(firstTemporary)
+ , m_highWatermark(0)
{
+ m_used.fill(0, usedVars.size());
+ m_free.reserveCapacity(usedVars.size());
+ for (size_t i = usedVars.size(); i-- > 0;) {
+ if (usedVars.get(i)) {
+ m_used[i] = max(); // This is mostly for debugging and sanity.
+ m_highWatermark = std::max(m_highWatermark, static_cast<unsigned>(i) + 1);
+ } else
+ m_free.append(i);
+ }
}
#if ENABLE(DFG_CONSISTENCY_CHECK)
~ScoreBoard()
{
- // Every VirtualRegister that was allocated should now be free.
- ASSERT(m_used.size() == m_free.size());
- // Every entry in the used list should be available in the free list.
- for (size_t i = 0; i < m_used.size(); ++i)
- ASSERT(m_free.contains(i));
// For every entry in the used list the use count of the virtual register should be zero.
for (size_t i = 0; i < m_free.size(); ++i)
- ASSERT(!m_used[i]);
+ ASSERT(!m_used[i] || m_used[i] == max());
}
#endif
@@ -71,13 +76,15 @@
m_free.removeLast();
// Use count must have hit zero for it to have been added to the free list!
ASSERT(!m_used[index]);
- return (VirtualRegister)(m_firstTemporary + index);
+ m_highWatermark = std::max(m_highWatermark, static_cast<unsigned>(index) + 1);
+ return (VirtualRegister)index;
}
// Allocate a new VirtualRegister, and add a corresponding entry to m_used.
- size_t next = allocatedCount();
+ size_t next = m_used.size();
m_used.append(0);
- return (VirtualRegister)(m_firstTemporary + next);
+ m_highWatermark = std::max(m_highWatermark, static_cast<unsigned>(next) + 1);
+ return (VirtualRegister)next;
}
// Increment the usecount for the VirtualRegsiter associated with 'child',
@@ -89,7 +96,8 @@
// Find the virtual register number for this child, increment its use count.
Node& node = m_graph[child];
- uint32_t index = node.virtualRegister() - m_firstTemporary;
+ uint32_t index = node.virtualRegister();
+ ASSERT(m_used[index] != max());
if (node.refCount() == ++m_used[index]) {
// If the use count in the scoreboard reaches the use count for the node,
// then this was its last use; the virtual register is now free.
@@ -99,27 +107,31 @@
}
}
- unsigned allocatedCount()
+ unsigned highWatermark()
{
- // m_used contains an entry for every allocated VirtualRegister.
- return m_used.size();
+ return m_highWatermark;
}
-
+
#ifndef NDEBUG
void dump()
{
printf(" USED: [ ");
for (unsigned i = 0; i < m_used.size(); ++i) {
- if (!m_free.contains(i))
- printf("%d:%d ", m_firstTemporary + i, m_used[i]);
+ if (!m_free.contains(i)) {
+ printf("%d:", i);
+ if (m_used[i] == max())
+ printf("local ");
+ else
+ printf("%d ", m_used[i]);
+ }
}
printf("]\n");
printf(" FREE: [ ");
for (unsigned i = 0; i < m_used.size(); ++i) {
- if (m_free.contains(i)) {
+ if (m_free.contains(i) && m_used[i] != max()) {
ASSERT(!m_used[i]);
- printf("%d ", m_firstTemporary + i);
+ printf("%d ", i);
}
}
printf("]\n");
@@ -128,11 +140,14 @@
#endif
private:
+ static uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
+
// The graph, so we can get refCounts for nodes, to determine when values are dead.
Graph& m_graph;
- // The first VirtualRegsiter to be used as a temporary.
- uint32_t m_firstTemporary;
+ // The size of the span of virtual registers that this code block will use.
+ unsigned m_highWatermark;
+
// For every virtual register that has been allocated (either currently alive, or in
// the free list), we keep a count of the number of remaining uses until it is dead
// (0, in the case of entries in the free list). Since there is an entry for every
Modified: trunk/Source/_javascript_Core/dfg/DFGSpeculativeJIT.cpp (97196 => 97197)
--- trunk/Source/_javascript_Core/dfg/DFGSpeculativeJIT.cpp 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/dfg/DFGSpeculativeJIT.cpp 2011-10-11 23:50:26 UTC (rev 97197)
@@ -82,6 +82,12 @@
case DisplacedInRegisterFile:
fprintf(out, "*%d", virtualRegister());
break;
+ case Int32DisplacedInRegisterFile:
+ fprintf(out, "*int32(%d)", virtualRegister());
+ break;
+ case DoubleDisplacedInRegisterFile:
+ fprintf(out, "*double(%d)", virtualRegister());
+ break;
case Constant:
fprintf(out, "[%s]", constant().description());
break;
Modified: trunk/Source/_javascript_Core/wtf/BitVector.cpp (97196 => 97197)
--- trunk/Source/_javascript_Core/wtf/BitVector.cpp 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/wtf/BitVector.cpp 2011-10-11 23:50:26 UTC (rev 97197)
@@ -32,13 +32,9 @@
#include <wtf/FastMalloc.h>
#include <wtf/StdLibExtras.h>
-BitVector::BitVector(const BitVector& other)
- : m_bitsOrPointer(makeInlineBits(0))
-{
- (*this) = other;
-}
+namespace WTF {
-BitVector& BitVector::operator=(const BitVector& other)
+void BitVector::setSlow(const BitVector& other)
{
uintptr_t newBitsOrPointer;
if (other.isInline())
@@ -51,7 +47,6 @@
if (!isInline())
OutOfLineBits::destroy(outOfLineBits());
m_bitsOrPointer = newBitsOrPointer;
- return *this;
}
void BitVector::resize(size_t numBits)
@@ -94,9 +89,32 @@
{
ASSERT(numBits > maxInlineBits());
OutOfLineBits* newOutOfLineBits = OutOfLineBits::create(numBits);
- memcpy(newOutOfLineBits->bits(), bits(), byteCount(std::min(size(), numBits)));
- if (!isInline())
+ if (isInline()) {
+ // Make sure that all of the bits are zero in case we do a no-op resize.
+ *newOutOfLineBits->bits() = m_bitsOrPointer & ~(static_cast<uintptr_t>(1) << maxInlineBits());
+ } else {
+ if (numBits > size()) {
+ size_t oldNumWords = outOfLineBits()->numWords();
+ size_t newNumWords = newOutOfLineBits->numWords();
+ memcpy(newOutOfLineBits->bits(), outOfLineBits()->bits(), oldNumWords * sizeof(void*));
+ memset(newOutOfLineBits->bits() + oldNumWords, 0, (newNumWords - oldNumWords) * sizeof(void*));
+ } else
+ memcpy(newOutOfLineBits->bits(), outOfLineBits()->bits(), newOutOfLineBits->numWords() * sizeof(void*));
OutOfLineBits::destroy(outOfLineBits());
+ }
m_bitsOrPointer = bitwise_cast<uintptr_t>(newOutOfLineBits);
}
+#ifndef NDEBUG
+void BitVector::dump(FILE* out)
+{
+ for (size_t i = 0; i < size(); ++i) {
+ if (get(i))
+ fprintf(out, "1");
+ else
+ fprintf(out, "-");
+ }
+}
+#endif
+
+} // namespace WTF
Modified: trunk/Source/_javascript_Core/wtf/BitVector.h (97196 => 97197)
--- trunk/Source/_javascript_Core/wtf/BitVector.h 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/wtf/BitVector.h 2011-10-11 23:50:26 UTC (rev 97197)
@@ -36,15 +36,16 @@
// to a single chunk of out-of-line allocated storage to store an arbitrary number
// of bits.
//
-// - The bitvector needs to be resized manually (just call ensureSize()).
-//
// - The bitvector remembers the bound of how many bits can be stored, but this
// may be slightly greater (by as much as some platform-specific constant)
// than the last argument passed to ensureSize().
//
+// - The bitvector can resize itself automatically (set, clear, get) or can be used
+// in a manual mode, which is faster (quickSet, quickClear, quickGet, ensureSize).
+//
// - Accesses ASSERT that you are within bounds.
//
-// - Bits are not automatically initialized to zero.
+// - Bits are automatically initialized to zero.
//
// On the other hand, this BitVector class may not be the fastest around, since
// it does conditionals on every get/set/clear. But it is great if you need to
@@ -58,8 +59,19 @@
{
}
- BitVector(const BitVector& other);
+ explicit BitVector(size_t numBits)
+ : m_bitsOrPointer(makeInlineBits(0))
+ {
+ ensureSize(numBits);
+ }
+ BitVector(const BitVector& other)
+ : m_bitsOrPointer(makeInlineBits(0))
+ {
+ (*this) = other;
+ }
+
+
~BitVector()
{
if (isInline())
@@ -67,7 +79,14 @@
OutOfLineBits::destroy(outOfLineBits());
}
- BitVector& operator=(const BitVector& other);
+ BitVector& operator=(const BitVector& other)
+ {
+ if (isInline() && other.isInline())
+ m_bitsOrPointer = other.m_bitsOrPointer;
+ else
+ setSlow(other);
+ return *this;
+ }
size_t size() const
{
@@ -88,24 +107,52 @@
void clearAll();
- bool get(size_t bit) const
+ bool quickGet(size_t bit) const
{
ASSERT(bit < size());
return !!(bits()[bit / bitsInPointer()] & (static_cast<uintptr_t>(1) << (bit & (bitsInPointer() - 1))));
}
- void set(size_t bit)
+ void quickSet(size_t bit)
{
ASSERT(bit < size());
bits()[bit / bitsInPointer()] |= (static_cast<uintptr_t>(1) << (bit & (bitsInPointer() - 1)));
}
- void clear(size_t bit)
+ void quickClear(size_t bit)
{
ASSERT(bit < size());
bits()[bit / bitsInPointer()] &= ~(static_cast<uintptr_t>(1) << (bit & (bitsInPointer() - 1)));
}
+ void quickSet(size_t bit, bool value)
+ {
+ if (value)
+ quickSet(bit);
+ else
+ quickClear(bit);
+ }
+
+ bool get(size_t bit) const
+ {
+ if (bit >= size())
+ return false;
+ return quickGet(bit);
+ }
+
+ void set(size_t bit)
+ {
+ ensureSize(bit + 1);
+ quickSet(bit);
+ }
+
+ void clear(size_t bit)
+ {
+ if (bit >= size())
+ return;
+ quickClear(bit);
+ }
+
void set(size_t bit, bool value)
{
if (value)
@@ -114,6 +161,10 @@
clear(bit);
}
+#ifndef NDEBUG
+ void dump(FILE* out);
+#endif
+
private:
static unsigned bitsInPointer()
{
@@ -162,6 +213,7 @@
OutOfLineBits* outOfLineBits() { return bitwise_cast<OutOfLineBits*>(m_bitsOrPointer); }
void resizeOutOfLine(size_t numBits);
+ void setSlow(const BitVector& other);
uintptr_t* bits()
{
Modified: trunk/Source/_javascript_Core/wtf/PackedIntVector.h (97196 => 97197)
--- trunk/Source/_javascript_Core/wtf/PackedIntVector.h 2011-10-11 23:48:51 UTC (rev 97196)
+++ trunk/Source/_javascript_Core/wtf/PackedIntVector.h 2011-10-11 23:50:26 UTC (rev 97197)
@@ -82,7 +82,7 @@
uintptr_t result = 0;
for (unsigned subIndex = 0; subIndex < bitCount; ++subIndex) {
result <<= 1;
- result |= (m_bits.get(index * bitCount + subIndex) ? 1 : 0);
+ result |= (m_bits.quickGet(index * bitCount + subIndex) ? 1 : 0);
}
return static_cast<T>(result);
}
@@ -98,7 +98,7 @@
ASSERT((myValue & mask()) == myValue);
for (unsigned subIndex = bitCount; subIndex-- > 0;) {
- m_bits.set(index * bitCount + subIndex, !!(myValue & 1));
+ m_bits.quickSet(index * bitCount + subIndex, !!(myValue & 1));
myValue >>= 1;
}