Modified: trunk/Source/_javascript_Core/ChangeLog (215056 => 215057)
--- trunk/Source/_javascript_Core/ChangeLog 2017-04-06 20:38:38 UTC (rev 215056)
+++ trunk/Source/_javascript_Core/ChangeLog 2017-04-06 20:58:34 UTC (rev 215057)
@@ -1,3 +1,35 @@
+2017-04-06 Filip Pizlo <fpi...@apple.com>
+
+ B3 -O1 should generate better code than -O0
+ https://bugs.webkit.org/show_bug.cgi?id=170563
+
+ Reviewed by Michael Saboff.
+
+ Prior to this change, code generated by -O1 ran slower than code generated by -O0. This turned
+ out to be because of reduceStrength optimizations that increase live ranges and create register
+ pressure, which then creates problems for linear scan.
+
+ It seemed obvious that canonicalizations that help isel, constant folding, and one-for-one
+ strength reductions should stay. It also seemed obvious that SSA and CFG simplification are fast
+ and harmless. So, I focused on removing:
+
+ - CSE, which increases live ranges. This is a risky optimization when we know that we've chosen
+ to use a bad register allocator.
+
+ - Sophisticated strength reductions that create more code, like the insane division optimization.
+
+ - Anything that inserts basic blocks.
+
+ CSE appeared to be the cause of half of the throughput regression of -O1 but none of the compile
+ time. This change also reduces the running time of reduceStrength by making it not a fixpoint at
+ optLevel<2.
+
+ This makes wasm -O1 compile 17% faster. This makes wasm -O1 run 19% faster. This makes -O1 code
+ run 3% faster than -O0, and compile about 4% slower than -O0. We may yet end up choosing to use
+ -O0, but at least now -O1 isn't totally useless.
+
+ * b3/B3ReduceStrength.cpp:
+
2017-04-06 Jon Davis <j...@apple.com>
Updates feature status for recently shipped features
Modified: trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp (215056 => 215057)
--- trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp 2017-04-06 20:38:38 UTC (rev 215056)
+++ trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp 2017-04-06 20:58:34 UTC (rev 215057)
@@ -442,9 +442,11 @@
simplifySSA();
- m_proc.resetValueOwners();
- m_dominators = &m_proc.dominators(); // Recompute if necessary.
- m_pureCSE.clear();
+ if (m_proc.optLevel() >= 2) {
+ m_proc.resetValueOwners();
+ m_dominators = &m_proc.dominators(); // Recompute if necessary.
+ m_pureCSE.clear();
+ }
for (BasicBlock* block : m_proc.blocksInPreOrder()) {
m_block = block;
@@ -457,23 +459,25 @@
}
m_value = m_block->at(m_index);
m_value->performSubstitution();
-
reduceValueStrength();
- replaceIfRedundant();
+ if (m_proc.optLevel() >= 2)
+ replaceIfRedundant();
}
m_insertionSet.execute(m_block);
}
m_changedCFG |= m_blockInsertionSet.execute();
- if (m_changedCFG) {
- m_proc.resetReachability();
- m_proc.invalidateCFG();
- m_dominators = nullptr; // Dominators are not valid anymore, and we don't need them yet.
- m_changed = true;
- }
+ handleChangedCFGIfNecessary();
result |= m_changed;
- } while (m_changed);
+ } while (m_changed && m_proc.optLevel() >= 2);
+
+ if (m_proc.optLevel() < 2) {
+ m_changedCFG = false;
+ simplifyCFG();
+ handleChangedCFGIfNecessary();
+ }
+
return result;
}
@@ -726,6 +730,9 @@
if (m_value->type() != Int32)
break;
+
+ if (m_proc.optLevel() < 2)
+ break;
int32_t divisor = m_value->child(1)->asInt32();
DivisionMagic<int32_t> magic = computeDivisionMagic(divisor);
@@ -821,6 +828,9 @@
break;
default:
+ if (m_proc.optLevel() < 2)
+ break;
+
// Turn this: Mod(N, D)
// Into this: Sub(N, Mul(Div(N, D), D))
//
@@ -1841,6 +1851,9 @@
checkValue->child(0) = checkValue->child(0)->child(0);
m_changed = true;
}
+
+ if (m_proc.optLevel() < 2)
+ break;
// If we are checking some bounded-size SSA _expression_ that leads to a Select that
// has a constant as one of its results, then turn the Select into a Branch and split
@@ -1947,17 +1960,19 @@
break;
}
- // If a check for the same property dominates us, we can kill the branch. This sort
- // of makes sense here because it's cheap, but hacks like this show that we're going
- // to need SCCP.
- Value* check = m_pureCSE.findMatch(
- ValueKey(Check, Void, m_value->child(0)), m_block, *m_dominators);
- if (check) {
- // The Check would have side-exited if child(0) was non-zero. So, it must be
- // zero here.
- m_block->taken().block()->removePredecessor(m_block);
- m_value->replaceWithJump(m_block, m_block->notTaken());
- m_changedCFG = true;
+ if (m_proc.optLevel() >= 2) {
+ // If a check for the same property dominates us, we can kill the branch. This sort
+ // of makes sense here because it's cheap, but hacks like this show that we're going
+ // to need SCCP.
+ Value* check = m_pureCSE.findMatch(
+ ValueKey(Check, Void, m_value->child(0)), m_block, *m_dominators);
+ if (check) {
+ // The Check would have side-exited if child(0) was non-zero. So, it must be
+ // zero here.
+ m_block->taken().block()->removePredecessor(m_block);
+ m_value->replaceWithJump(m_block, m_block->notTaken());
+ m_changedCFG = true;
+ }
}
break;
}
@@ -2378,6 +2393,16 @@
dataLog(m_proc);
}
}
+
+ void handleChangedCFGIfNecessary()
+ {
+ if (m_changedCFG) {
+ m_proc.resetReachability();
+ m_proc.invalidateCFG();
+ m_dominators = nullptr; // Dominators are not valid anymore, and we don't need them yet.
+ m_changed = true;
+ }
+ }
void checkPredecessorValidity()
{