Title: [215057] trunk/Source/_javascript_Core
Revision
215057
Author
fpi...@apple.com
Date
2017-04-06 13:58:34 -0700 (Thu, 06 Apr 2017)

Log Message

B3 -O1 should generate better code than -O0
https://bugs.webkit.org/show_bug.cgi?id=170563

Reviewed by Michael Saboff.
        
Prior to this change, code generated by -O1 ran slower than code generated by -O0. This turned
out to be because of reduceStrength optimizations that increase live ranges and create register
pressure, which then creates problems for linear scan.
        
It seemed obvious that canonicalizations that help isel, constant folding, and one-for-one
strength reductions should stay. It also seemed obvious that SSA and CFG simplification are fast
and harmless. So, I focused on removing:
        
- CSE, which increases live ranges. This is a risky optimization when we know that we've chosen
  to use a bad register allocator.
        
- Sophisticated strength reductions that create more code, like the insane division optimization.
        
- Anything that inserts basic blocks.
        
CSE appeared to be the cause of half of the throughput regression of -O1 but none of the compile
time. This change also reduces the running time of reduceStrength by making it not a fixpoint at
optLevel<2.
        
This makes wasm -O1 compile 17% faster. This makes wasm -O1 run 19% faster. This makes -O1 code
run 3% faster than -O0, and compile about 4% slower than -O0. We may yet end up choosing to use
-O0, but at least now -O1 isn't totally useless.

* b3/B3ReduceStrength.cpp:

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (215056 => 215057)


--- trunk/Source/_javascript_Core/ChangeLog	2017-04-06 20:38:38 UTC (rev 215056)
+++ trunk/Source/_javascript_Core/ChangeLog	2017-04-06 20:58:34 UTC (rev 215057)
@@ -1,3 +1,35 @@
+2017-04-06  Filip Pizlo  <fpi...@apple.com>
+
+        B3 -O1 should generate better code than -O0
+        https://bugs.webkit.org/show_bug.cgi?id=170563
+
+        Reviewed by Michael Saboff.
+        
+        Prior to this change, code generated by -O1 ran slower than code generated by -O0. This turned
+        out to be because of reduceStrength optimizations that increase live ranges and create register
+        pressure, which then creates problems for linear scan.
+        
+        It seemed obvious that canonicalizations that help isel, constant folding, and one-for-one
+        strength reductions should stay. It also seemed obvious that SSA and CFG simplification are fast
+        and harmless. So, I focused on removing:
+        
+        - CSE, which increases live ranges. This is a risky optimization when we know that we've chosen
+          to use a bad register allocator.
+        
+        - Sophisticated strength reductions that create more code, like the insane division optimization.
+        
+        - Anything that inserts basic blocks.
+        
+        CSE appeared to be the cause of half of the throughput regression of -O1 but none of the compile
+        time. This change also reduces the running time of reduceStrength by making it not a fixpoint at
+        optLevel<2.
+        
+        This makes wasm -O1 compile 17% faster. This makes wasm -O1 run 19% faster. This makes -O1 code
+        run 3% faster than -O0, and compile about 4% slower than -O0. We may yet end up choosing to use
+        -O0, but at least now -O1 isn't totally useless.
+
+        * b3/B3ReduceStrength.cpp:
+
 2017-04-06  Jon Davis  <j...@apple.com>
 
         Updates feature status for recently shipped features

Modified: trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp (215056 => 215057)


--- trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp	2017-04-06 20:38:38 UTC (rev 215056)
+++ trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp	2017-04-06 20:58:34 UTC (rev 215057)
@@ -442,9 +442,11 @@
             
             simplifySSA();
             
-            m_proc.resetValueOwners();
-            m_dominators = &m_proc.dominators(); // Recompute if necessary.
-            m_pureCSE.clear();
+            if (m_proc.optLevel() >= 2) {
+                m_proc.resetValueOwners();
+                m_dominators = &m_proc.dominators(); // Recompute if necessary.
+                m_pureCSE.clear();
+            }
 
             for (BasicBlock* block : m_proc.blocksInPreOrder()) {
                 m_block = block;
@@ -457,23 +459,25 @@
                     }
                     m_value = m_block->at(m_index);
                     m_value->performSubstitution();
-                    
                     reduceValueStrength();
-                    replaceIfRedundant();
+                    if (m_proc.optLevel() >= 2)
+                        replaceIfRedundant();
                 }
                 m_insertionSet.execute(m_block);
             }
 
             m_changedCFG |= m_blockInsertionSet.execute();
-            if (m_changedCFG) {
-                m_proc.resetReachability();
-                m_proc.invalidateCFG();
-                m_dominators = nullptr; // Dominators are not valid anymore, and we don't need them yet.
-                m_changed = true;
-            }
+            handleChangedCFGIfNecessary();
             
             result |= m_changed;
-        } while (m_changed);
+        } while (m_changed && m_proc.optLevel() >= 2);
+        
+        if (m_proc.optLevel() < 2) {
+            m_changedCFG = false;
+            simplifyCFG();
+            handleChangedCFGIfNecessary();
+        }
+        
         return result;
     }
     
@@ -726,6 +730,9 @@
 
                     if (m_value->type() != Int32)
                         break;
+                    
+                    if (m_proc.optLevel() < 2)
+                        break;
 
                     int32_t divisor = m_value->child(1)->asInt32();
                     DivisionMagic<int32_t> magic = computeDivisionMagic(divisor);
@@ -821,6 +828,9 @@
                     break;
 
                 default:
+                    if (m_proc.optLevel() < 2)
+                        break;
+                    
                     // Turn this: Mod(N, D)
                     // Into this: Sub(N, Mul(Div(N, D), D))
                     //
@@ -1841,6 +1851,9 @@
                 checkValue->child(0) = checkValue->child(0)->child(0);
                 m_changed = true;
             }
+            
+            if (m_proc.optLevel() < 2)
+                break;
 
             // If we are checking some bounded-size SSA _expression_ that leads to a Select that
             // has a constant as one of its results, then turn the Select into a Branch and split
@@ -1947,17 +1960,19 @@
                 break;
             }
 
-            // If a check for the same property dominates us, we can kill the branch. This sort
-            // of makes sense here because it's cheap, but hacks like this show that we're going
-            // to need SCCP.
-            Value* check = m_pureCSE.findMatch(
-                ValueKey(Check, Void, m_value->child(0)), m_block, *m_dominators);
-            if (check) {
-                // The Check would have side-exited if child(0) was non-zero. So, it must be
-                // zero here.
-                m_block->taken().block()->removePredecessor(m_block);
-                m_value->replaceWithJump(m_block, m_block->notTaken());
-                m_changedCFG = true;
+            if (m_proc.optLevel() >= 2) {
+                // If a check for the same property dominates us, we can kill the branch. This sort
+                // of makes sense here because it's cheap, but hacks like this show that we're going
+                // to need SCCP.
+                Value* check = m_pureCSE.findMatch(
+                    ValueKey(Check, Void, m_value->child(0)), m_block, *m_dominators);
+                if (check) {
+                    // The Check would have side-exited if child(0) was non-zero. So, it must be
+                    // zero here.
+                    m_block->taken().block()->removePredecessor(m_block);
+                    m_value->replaceWithJump(m_block, m_block->notTaken());
+                    m_changedCFG = true;
+                }
             }
             break;
         }
@@ -2378,6 +2393,16 @@
             dataLog(m_proc);
         }
     }
+    
+    void handleChangedCFGIfNecessary()
+    {
+        if (m_changedCFG) {
+            m_proc.resetReachability();
+            m_proc.invalidateCFG();
+            m_dominators = nullptr; // Dominators are not valid anymore, and we don't need them yet.
+            m_changed = true;
+        }
+    }
 
     void checkPredecessorValidity()
     {
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to