Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (192945 => 192946)
--- trunk/Source/_javascript_Core/ChangeLog 2015-12-02 17:27:37 UTC (rev 192945)
+++ trunk/Source/_javascript_Core/ChangeLog 2015-12-02 18:49:09 UTC (rev 192946)
@@ -1,3 +1,60 @@
+2015-12-02 Benjamin Poulain <[email protected]>
+
+ [JSC] Handle x86 partial register stalls in Air
+ https://bugs.webkit.org/show_bug.cgi?id=151735
+
+ Reviewed by Filip Pizlo.
+
+ This patch adds a primitive false-dependency breaking
+ algorithm to Air. We look for redefinition of the same
+ variable that is too close to a partial definition.
+
+ There is not explicit dependency tracking going on,
+ but it is pretty fast and the extra xorps added on false-positives
+ are cheap anyway.
+
+ Typically, partial register stalls appear from instructions
+ interfering with themselves in small loops. Something like:
+
+ Label0:
+ cvtsi2sdq %eax, %xmm0
+ ...
+ jmp Label0
+
+ Those are correctly detected by propagating the local distance
+ information from block to block until no unsafe chain is found.
+
+ The test testInt32ToDoublePartialRegisterStall() checks the kind
+ of cases we typically find from _javascript_.
+ The execution time is 20% faster with a register reset (which is
+ astounding since the very next instruction has a real dependency).
+
+ Future tweaks will be needed when we can run more _javascript_:
+ -Handle function calls differently.
+ -Anything with a special can have hidden instructions.
+ We need to take them into account.
+
+ * _javascript_Core.xcodeproj/project.pbxproj:
+ * assembler/MacroAssemblerX86Common.h:
+ (JSC::MacroAssemblerX86Common::moveZeroToDouble):
+ * assembler/X86Assembler.h:
+ (JSC::X86Assembler::xorps_rr):
+ (JSC::X86Assembler::xorpd_rr):
+ According to the documentation, starting with Sandy Bridge,
+ registers reset can be done in the frontend with xorps.
+
+ * b3/B3IndexSet.h:
+ (JSC::B3::IndexSet::remove):
+ * b3/air/AirFixPartialRegisterStalls.cpp: Added.
+ (JSC::B3::Air::fixPartialRegisterStalls):
+ * b3/air/AirFixPartialRegisterStalls.h: Added.
+ * b3/air/AirGenerate.cpp:
+ (JSC::B3::Air::prepareForGeneration):
+ * b3/testb3.cpp:
+ (JSC::B3::testInt32ToDoublePartialRegisterStall):
+ (JSC::B3::run):
+ * jit/FPRInfo.h:
+
2015-12-01 Yusuke Suzuki <[email protected]>
[ES6] Implement LLInt/Baseline Support for ES6 Generators and enable this feature
Modified: trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj (192945 => 192946)
--- trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2015-12-02 17:27:37 UTC (rev 192945)
+++ trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2015-12-02 18:49:09 UTC (rev 192946)
@@ -1086,6 +1086,8 @@
1ACF7377171CA6FB00C9BB1E /* Weak.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ACF7376171CA6FB00C9BB1E /* Weak.cpp */; };
2600B5A6152BAAA70091EE5F /* JSStringJoiner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2600B5A4152BAAA70091EE5F /* JSStringJoiner.cpp */; };
2600B5A7152BAAA70091EE5F /* JSStringJoiner.h in Headers */ = {isa = PBXBuildFile; fileRef = 2600B5A5152BAAA70091EE5F /* JSStringJoiner.h */; };
+ 262D85B61C0D650F006ACB61 /* AirFixPartialRegisterStalls.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 262D85B41C0D650F006ACB61 /* AirFixPartialRegisterStalls.cpp */; };
+ 262D85B71C0D650F006ACB61 /* AirFixPartialRegisterStalls.h in Headers */ = {isa = PBXBuildFile; fileRef = 262D85B51C0D650F006ACB61 /* AirFixPartialRegisterStalls.h */; };
26718BA41BE99F780052017B /* AirIteratedRegisterCoalescing.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26718BA21BE99F780052017B /* AirIteratedRegisterCoalescing.cpp */; };
26718BA51BE99F780052017B /* AirIteratedRegisterCoalescing.h in Headers */ = {isa = PBXBuildFile; fileRef = 26718BA31BE99F780052017B /* AirIteratedRegisterCoalescing.h */; };
2684D4381C00161C0081D663 /* AirLiveness.h in Headers */ = {isa = PBXBuildFile; fileRef = 2684D4371C00161C0081D663 /* AirLiveness.h */; };
@@ -3138,6 +3140,8 @@
1CAA8B4B0D32C39A0041BCFF /* _javascript_Core.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = _javascript_Core.h; sourceTree = "<group>"; };
2600B5A4152BAAA70091EE5F /* JSStringJoiner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSStringJoiner.cpp; sourceTree = "<group>"; };
2600B5A5152BAAA70091EE5F /* JSStringJoiner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSStringJoiner.h; sourceTree = "<group>"; };
+ 262D85B41C0D650F006ACB61 /* AirFixPartialRegisterStalls.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AirFixPartialRegisterStalls.cpp; path = b3/air/AirFixPartialRegisterStalls.cpp; sourceTree = "<group>"; };
+ 262D85B51C0D650F006ACB61 /* AirFixPartialRegisterStalls.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AirFixPartialRegisterStalls.h; path = b3/air/AirFixPartialRegisterStalls.h; sourceTree = "<group>"; };
264091FA1BE2FD4100684DB2 /* AirOpcode.opcodes */ = {isa = PBXFileReference; lastKnownFileType = text; name = AirOpcode.opcodes; path = b3/air/AirOpcode.opcodes; sourceTree = "<group>"; };
26718BA21BE99F780052017B /* AirIteratedRegisterCoalescing.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AirIteratedRegisterCoalescing.cpp; path = b3/air/AirIteratedRegisterCoalescing.cpp; sourceTree = "<group>"; };
26718BA31BE99F780052017B /* AirIteratedRegisterCoalescing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AirIteratedRegisterCoalescing.h; path = b3/air/AirIteratedRegisterCoalescing.h; sourceTree = "<group>"; };
@@ -4655,6 +4659,8 @@
0FEC85511BDACDC70080FF74 /* AirCode.h */,
0F4570361BE44C910062A629 /* AirEliminateDeadCode.cpp */,
0F4570371BE44C910062A629 /* AirEliminateDeadCode.h */,
+ 262D85B41C0D650F006ACB61 /* AirFixPartialRegisterStalls.cpp */,
+ 262D85B51C0D650F006ACB61 /* AirFixPartialRegisterStalls.h */,
0FEC85521BDACDC70080FF74 /* AirFrequentedBlock.h */,
0FEC85531BDACDC70080FF74 /* AirGenerate.cpp */,
0FEC85541BDACDC70080FF74 /* AirGenerate.h */,
@@ -7841,6 +7847,7 @@
86704B8712DBA33700A9FE7B /* YarrJIT.h in Headers */,
86704B8812DBA33700A9FE7B /* YarrParser.h in Headers */,
86704B8A12DBA33700A9FE7B /* YarrPattern.h in Headers */,
+ 262D85B71C0D650F006ACB61 /* AirFixPartialRegisterStalls.h in Headers */,
86704B4312DB8A8100A9FE7B /* YarrSyntaxChecker.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
@@ -8754,6 +8761,7 @@
A1B9E23D1B4E0D6700BC7FED /* IntlCollatorPrototype.cpp in Sources */,
A1587D6D1B4DC14100D69849 /* IntlDateTimeFormat.cpp in Sources */,
A1587D6F1B4DC14100D69849 /* IntlDateTimeFormatConstructor.cpp in Sources */,
+ 262D85B61C0D650F006ACB61 /* AirFixPartialRegisterStalls.cpp in Sources */,
70B7919B1C024A46002481E2 /* JSGeneratorFunction.cpp in Sources */,
A1587D711B4DC14100D69849 /* IntlDateTimeFormatPrototype.cpp in Sources */,
A1D792FC1B43864B004516F5 /* IntlNumberFormat.cpp in Sources */,
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h (192945 => 192946)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2015-12-02 17:27:37 UTC (rev 192945)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2015-12-02 18:49:09 UTC (rev 192946)
@@ -996,7 +996,7 @@
void moveZeroToDouble(FPRegisterID reg)
{
- m_assembler.xorpd_rr(reg, reg);
+ m_assembler.xorps_rr(reg, reg);
}
Jump branchDoubleNonZero(FPRegisterID reg, FPRegisterID scratch)
Modified: trunk/Source/_javascript_Core/assembler/X86Assembler.h (192945 => 192946)
--- trunk/Source/_javascript_Core/assembler/X86Assembler.h 2015-12-02 17:27:37 UTC (rev 192945)
+++ trunk/Source/_javascript_Core/assembler/X86Assembler.h 2015-12-02 18:49:09 UTC (rev 192946)
@@ -2120,8 +2120,17 @@
m_formatter.twoByteOp(OP2_DIVSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void xorps_rr(XMMRegisterID src, XMMRegisterID dst)
+ {
+ m_formatter.twoByteOp(OP2_XORPD_VpdWpd, (RegisterID)dst, (RegisterID)src);
+ }
+
void xorpd_rr(XMMRegisterID src, XMMRegisterID dst)
{
+ if (src == dst) {
+ xorps_rr(src, dst);
+ return;
+ }
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_XORPD_VpdWpd, (RegisterID)dst, (RegisterID)src);
}
Modified: trunk/Source/_javascript_Core/b3/B3IndexSet.h (192945 => 192946)
--- trunk/Source/_javascript_Core/b3/B3IndexSet.h 2015-12-02 17:27:37 UTC (rev 192945)
+++ trunk/Source/_javascript_Core/b3/B3IndexSet.h 2015-12-02 18:49:09 UTC (rev 192946)
@@ -48,6 +48,11 @@
return !m_set.set(value->index());
}
+ bool remove(T* value)
+ {
+ return m_set.clear(value->index());
+ }
+
bool contains(T* value) const
{
if (!value)
Added: trunk/Source/_javascript_Core/b3/air/AirFixPartialRegisterStalls.cpp (0 => 192946)
--- trunk/Source/_javascript_Core/b3/air/AirFixPartialRegisterStalls.cpp (rev 0)
+++ trunk/Source/_javascript_Core/b3/air/AirFixPartialRegisterStalls.cpp 2015-12-02 18:49:09 UTC (rev 192946)
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "AirFixPartialRegisterStalls.h"
+
+#if ENABLE(B3_JIT)
+
+#include "AirBasicBlock.h"
+#include "AirCode.h"
+#include "AirInsertionSet.h"
+#include "AirInst.h"
+#include "AirInstInlines.h"
+#include "AirPhaseScope.h"
+#include "B3IndexMap.h"
+#include "B3IndexSet.h"
+#include "MacroAssembler.h"
+#include <wtf/Vector.h>
+
+namespace JSC { namespace B3 { namespace Air {
+
+namespace {
+
+bool hasPartialXmmRegUpdate(const Inst& inst)
+{
+ switch (inst.opcode) {
+ case ConvertInt32ToDouble:
+ case ConvertInt64ToDouble:
+ case SqrtDouble:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool isDependencyBreaking(const Inst& inst)
+{
+ // "xorps reg, reg" is used by the frontend to remove the dependency on its argument.
+ return inst.opcode == MoveZeroToDouble;
+}
+
+// FIXME: find a good distance per architecture experimentally.
+// LLVM uses a distance of 16 but that comes from Nehalem.
+unsigned char minimumSafeDistance = 16;
+
+struct FPDefDistance {
+ FPDefDistance()
+ {
+ for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i)
+ distance[i] = 255;
+ }
+
+ void reset(FPRReg reg)
+ {
+ unsigned index = MacroAssembler::fpRegisterIndex(reg);
+ distance[index] = 255;
+ }
+
+ void add(FPRReg reg, unsigned registerDistance)
+ {
+ unsigned index = MacroAssembler::fpRegisterIndex(reg);
+ if (registerDistance < distance[index])
+ distance[index] = static_cast<unsigned char>(registerDistance);
+ }
+
+ bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0)
+ {
+ bool changed = false;
+ for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) {
+ unsigned regDistance = precessorDistance.distance[i] + constantOffset;
+ if (regDistance < minimumSafeDistance && regDistance < distance[i]) {
+ distance[i] = regDistance;
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ unsigned char distance[MacroAssembler::numberOfFPRegisters()];
+};
+
+void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd)
+{
+ --distanceToBlockEnd;
+
+ if (isDependencyBreaking(inst)) {
+ localDistance.reset(inst.args[0].tmp().fpr());
+ return;
+ }
+
+ inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type) {
+ ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation.");
+
+ if (tmp.isFPR() && Arg::isDef(role))
+ localDistance.add(tmp.fpr(), distanceToBlockEnd);
+ });
+}
+
+}
+
+void fixPartialRegisterStalls(Code& code)
+{
+ if (!isX86())
+ return;
+
+ PhaseScope phaseScope(code, "fixPartialRegisterStalls");
+
+ Vector<BasicBlock*> candidates;
+
+ for (BasicBlock* block : code) {
+ for (const Inst& inst : *block) {
+ if (hasPartialXmmRegUpdate(inst)) {
+ candidates.append(block);
+ break;
+ }
+ }
+ }
+
+ // Fortunately, Partial Stalls are rarely used. Return early if no block
+ // cares about them.
+ if (candidates.isEmpty())
+ return;
+
+ // For each block, this provides the distance to the last instruction setting each register
+ // on block *entry*.
+ IndexMap<BasicBlock, FPDefDistance> lastDefDistance(code.size());
+
+ // Blocks with dirty distance at head.
+ IndexSet<BasicBlock> dirty;
+
+ // First, we compute the local distance for each block and push it to the successors.
+ for (BasicBlock* block : code) {
+ FPDefDistance localDistance;
+
+ unsigned distanceToBlockEnd = block->size();
+ for (Inst& inst : *block)
+ updateDistances(inst, localDistance, distanceToBlockEnd);
+
+ for (BasicBlock* successor : block->successorBlocks()) {
+ if (lastDefDistance[successor].updateFromPrecessor(localDistance))
+ dirty.add(successor);
+ }
+ }
+
+ // Now we propagate the minimums accross blocks.
+ bool changed;
+ do {
+ changed = false;
+
+ for (BasicBlock* block : code) {
+ if (!dirty.remove(block))
+ continue;
+
+ // Little shortcut: if the block is big enough, propagating it won't add any information.
+ if (block->size() >= minimumSafeDistance)
+ continue;
+
+ unsigned blockSize = block->size();
+ FPDefDistance& blockDistance = lastDefDistance[block];
+ for (BasicBlock* successor : block->successorBlocks()) {
+ if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) {
+ dirty.add(successor);
+ changed = true;
+ }
+ }
+ }
+ } while (changed);
+
+ // Finally, update each block as needed.
+ InsertionSet insertionSet(code);
+ for (BasicBlock* block : candidates) {
+ unsigned distanceToBlockEnd = block->size();
+ FPDefDistance& localDistance = lastDefDistance[block];
+
+ for (unsigned i = 0; i < block->size(); ++i) {
+ Inst& inst = block->at(i);
+
+ if (hasPartialXmmRegUpdate(inst)) {
+ RegisterSet defs;
+ RegisterSet uses;
+ inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type) {
+ if (tmp.isFPR()) {
+ if (Arg::isDef(role))
+ defs.set(tmp.fpr());
+ if (Arg::isAnyUse(role))
+ uses.set(tmp.fpr());
+ }
+ });
+ // We only care about values we define but not use. Otherwise we have to wait
+ // for the value to be resolved anyway.
+ defs.exclude(uses);
+
+ defs.forEach([&] (Reg reg) {
+ if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance)
+ insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg));
+ });
+ }
+
+ updateDistances(inst, localDistance, distanceToBlockEnd);
+ }
+ insertionSet.execute(block);
+ }
+}
+
+} } } // namespace JSC::B3::Air
+
+#endif // ENABLE(B3_JIT)
Added: trunk/Source/_javascript_Core/b3/air/AirFixPartialRegisterStalls.h (0 => 192946)
--- trunk/Source/_javascript_Core/b3/air/AirFixPartialRegisterStalls.h (rev 0)
+++ trunk/Source/_javascript_Core/b3/air/AirFixPartialRegisterStalls.h 2015-12-02 18:49:09 UTC (rev 192946)
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AirFixPartialRegisterStalls_h
+#define AirFixPartialRegisterStalls_h
+
+#if ENABLE(B3_JIT)
+
+namespace JSC { namespace B3 { namespace Air {
+
+class Code;
+
+// x86 has a pipelining hazard caused by false dependencies between instructions.
+//
+// Some instructions update only part of a register, they can only be scheduled after
+// the previous definition is computed. This problem can be avoided by the compiler
+// by explicitely resetting the entire register before executing the instruction with
+// partial update.
+//
+// See "Partial XMM Register Stalls" and "Dependency Breaking Idioms" in the manual.
+void fixPartialRegisterStalls(Code&);
+
+} } } // namespace JSC::B3::Air
+
+#endif // ENABLE(B3_JIT)
+
+#endif // AirFixPartialRegisterStalls_h
Modified: trunk/Source/_javascript_Core/b3/air/AirGenerate.cpp (192945 => 192946)
--- trunk/Source/_javascript_Core/b3/air/AirGenerate.cpp 2015-12-02 17:27:37 UTC (rev 192945)
+++ trunk/Source/_javascript_Core/b3/air/AirGenerate.cpp 2015-12-02 18:49:09 UTC (rev 192946)
@@ -31,6 +31,7 @@
#include "AirAllocateStack.h"
#include "AirCode.h"
#include "AirEliminateDeadCode.h"
+#include "AirFixPartialRegisterStalls.h"
#include "AirGenerationContext.h"
#include "AirHandleCalleeSaves.h"
#include "AirIteratedRegisterCoalescing.h"
@@ -95,6 +96,10 @@
// frequency successor is also the fall-through target.
optimizeBlockOrder(code);
+ // Attempt to remove false dependencies between instructions created by partial register changes.
+ // This must be executed as late as possible as it depends on the instructions order and register use.
+ fixPartialRegisterStalls(code);
+
// This is needed to satisfy a requirement of B3::StackmapValue.
reportUsedRegisters(code);
Modified: trunk/Source/_javascript_Core/b3/testb3.cpp (192945 => 192946)
--- trunk/Source/_javascript_Core/b3/testb3.cpp 2015-12-02 17:27:37 UTC (rev 192945)
+++ trunk/Source/_javascript_Core/b3/testb3.cpp 2015-12-02 18:49:09 UTC (rev 192946)
@@ -2667,6 +2667,105 @@
compileAndRun<double>(proc, 1.1, 2.5);
}
+void testInt32ToDoublePartialRegisterStall()
+{
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+ BasicBlock* loop = proc.addBlock();
+ BasicBlock* done = proc.addBlock();
+
+ // Head.
+ Value* total = root->appendNew<ConstDoubleValue>(proc, Origin(), 0.);
+ Value* counter = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+ UpsilonValue* originalTotal = root->appendNew<UpsilonValue>(proc, Origin(), total);
+ UpsilonValue* originalCounter = root->appendNew<UpsilonValue>(proc, Origin(), counter);
+ root->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(loop));
+
+ // Loop.
+ Value* loopCounter = loop->appendNew<Value>(proc, Phi, Int64, Origin());
+ Value* loopTotal = loop->appendNew<Value>(proc, Phi, Double, Origin());
+ originalCounter->setPhi(loopCounter);
+ originalTotal->setPhi(loopTotal);
+
+ Value* truncatedCounter = loop->appendNew<Value>(proc, Trunc, Origin(), loopCounter);
+ Value* doubleCounter = loop->appendNew<Value>(proc, IToD, Origin(), truncatedCounter);
+ Value* updatedTotal = loop->appendNew<Value>(proc, Add, Origin(), doubleCounter, loopTotal);
+ UpsilonValue* updatedTotalUpsilon = loop->appendNew<UpsilonValue>(proc, Origin(), updatedTotal);
+ updatedTotalUpsilon->setPhi(loopTotal);
+
+ Value* decCounter = loop->appendNew<Value>(proc, Sub, Origin(), loopCounter, loop->appendNew<Const64Value>(proc, Origin(), 1));
+ UpsilonValue* decCounterUpsilon = loop->appendNew<UpsilonValue>(proc, Origin(), decCounter);
+ decCounterUpsilon->setPhi(loopCounter);
+ loop->appendNew<ControlValue>(
+ proc, Branch, Origin(),
+ decCounter,
+ FrequentedBlock(loop), FrequentedBlock(done));
+
+ // Tail.
+ done->appendNew<ControlValue>(proc, Return, Origin(), updatedTotal);
+ CHECK(isIdentical(compileAndRun<double>(proc, 100000), 5000050000.));
+}
+
+void testInt32ToDoublePartialRegisterWithoutStall()
+{
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+ BasicBlock* loop = proc.addBlock();
+ BasicBlock* done = proc.addBlock();
+
+ // Head.
+ Value* total = root->appendNew<ConstDoubleValue>(proc, Origin(), 0.);
+ Value* counter = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+ UpsilonValue* originalTotal = root->appendNew<UpsilonValue>(proc, Origin(), total);
+ UpsilonValue* originalCounter = root->appendNew<UpsilonValue>(proc, Origin(), counter);
+ uint64_t forPaddingInput;
+ Value* forPaddingInputAddress = root->appendNew<ConstPtrValue>(proc, Origin(), &forPaddingInput);
+ uint64_t forPaddingOutput;
+ Value* forPaddingOutputAddress = root->appendNew<ConstPtrValue>(proc, Origin(), &forPaddingOutput);
+ root->appendNew<ControlValue>(proc, Jump, Origin(), FrequentedBlock(loop));
+
+ // Loop.
+ Value* loopCounter = loop->appendNew<Value>(proc, Phi, Int64, Origin());
+ Value* loopTotal = loop->appendNew<Value>(proc, Phi, Double, Origin());
+ originalCounter->setPhi(loopCounter);
+ originalTotal->setPhi(loopTotal);
+
+ Value* truncatedCounter = loop->appendNew<Value>(proc, Trunc, Origin(), loopCounter);
+ Value* doubleCounter = loop->appendNew<Value>(proc, IToD, Origin(), truncatedCounter);
+ Value* updatedTotal = loop->appendNew<Value>(proc, Add, Origin(), doubleCounter, loopTotal);
+
+ // Add enough padding instructions to avoid a stall.
+ Value* loadPadding = loop->appendNew<MemoryValue>(proc, Load, Int64, Origin(), forPaddingInputAddress);
+ Value* padding = loop->appendNew<Value>(proc, BitXor, Origin(), loadPadding, loopCounter);
+ padding = loop->appendNew<Value>(proc, Add, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, BitOr, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, Sub, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, BitXor, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, Add, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, BitOr, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, Sub, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, BitXor, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, Add, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, BitOr, Origin(), padding, loopCounter);
+ padding = loop->appendNew<Value>(proc, Sub, Origin(), padding, loopCounter);
+ loop->appendNew<MemoryValue>(proc, Store, Origin(), padding, forPaddingOutputAddress);
+
+ UpsilonValue* updatedTotalUpsilon = loop->appendNew<UpsilonValue>(proc, Origin(), updatedTotal);
+ updatedTotalUpsilon->setPhi(loopTotal);
+
+ Value* decCounter = loop->appendNew<Value>(proc, Sub, Origin(), loopCounter, loop->appendNew<Const64Value>(proc, Origin(), 1));
+ UpsilonValue* decCounterUpsilon = loop->appendNew<UpsilonValue>(proc, Origin(), decCounter);
+ decCounterUpsilon->setPhi(loopCounter);
+ loop->appendNew<ControlValue>(
+ proc, Branch, Origin(),
+ decCounter,
+ FrequentedBlock(loop), FrequentedBlock(done));
+
+ // Tail.
+ done->appendNew<ControlValue>(proc, Return, Origin(), updatedTotal);
+ CHECK(isIdentical(compileAndRun<double>(proc, 100000), 5000050000.));
+}
+
void testBranch()
{
Procedure proc;
@@ -5888,6 +5987,9 @@
RUN(testSpillGP());
RUN(testSpillFP());
+ RUN(testInt32ToDoublePartialRegisterStall());
+ RUN(testInt32ToDoublePartialRegisterWithoutStall());
+
RUN(testCallSimple(1, 2));
RUN(testCallFunctionWithHellaArguments());